In [3]:
import os
import glob
In [4]:
os.getcwd()
Out[4]:
In [5]:
path = 'C:\\Users\\Dell\\Downloads'
In [6]:
extension = 'csv'
os.chdir(path)
In [7]:
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
In [8]:
import pandas as pd
In [40]:
iris=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv")
In [45]:
iris.info()
In [104]:
import seaborn as sns
%matplotlib inline
In [46]:
import matplotlib.pyplot as plt
%matplotlib inline
In [49]:
plt.bar(iris['Sepal.Length'],iris['Sepal.Width'],label="bar1",color='r')
Out[49]:
In [50]:
plt.bar(iris['Petal.Length'],iris['Petal.Width'],label="bar1",color='g')
Out[50]:
In [52]:
fig=plt.figure()
In [53]:
ax1=fig.add_subplot(1,2,1)
ax2=fig.add_subplot(1,2,2)
In [58]:
ax1.boxplot(iris['Sepal.Length'])
ax1.set_xlabel('Sepal.Length')
plt.show()
In [59]:
ax2.boxplot(iris['Petal.Length'])
ax2.set_xlabel('Petal.Length')
plt.show()
In [60]:
plt.boxplot(iris['Petal.Length'])
Out[60]:
In [61]:
plt.hist(iris['Sepal.Length'])
Out[61]:
In [64]:
plt.scatter(iris['Petal.Length'],iris['Sepal.Length'])
Out[64]:
In [ ]:
In [67]:
slices=pd.value_counts(iris.Species)
print(slices)
In [71]:
labels=pd.Series(iris.Species.unique())
print(labels)
In [ ]:
colors=['r','y','g']
In [83]:
plt.pie(pd.value_counts(iris.Species),labels=['virginica','versicolor','setosa'],colors=['r','y','g'],autopct='%1.1f%%')
Out[83]:
In [84]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
np.random.seed(sum(map(ord, "aesthetics")))
In [9]:
os.listdir()
Out[9]:
In [85]:
def sinplot(flip=1):
x = np.linspace(0, 14, 100)
for i in range(1, 7):
plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)
In [86]:
sinplot()
In [87]:
sns.set_style("white")
In [88]:
sinplot()
In [89]:
sns.set_style("ticks")
sinplot()
In [91]:
sns.palplot(sns.color_palette())
In [92]:
sns.palplot(sns.color_palette("hls",8))
In [93]:
sns.palplot(sns.color_palette("BuGn", 10))
In [94]:
sinplot()
In [95]:
diamonds=pd.read_csv("C:\\Users\\Dell\\Downloads\\BigDiamonds.csv\\BigDiamonds.csv")
In [96]:
type(diamonds)
Out[96]:
In [97]:
len(diamonds)
Out[97]:
In [98]:
diamonds.columns
Out[98]:
In [99]:
diamonds.shape
Out[99]:
In [15]:
diamonds.info()
In [100]:
diamonds.head()
Out[100]:
In [101]:
diamonds2=diamonds.copy()
In [102]:
pd.value_counts(diamonds2.cut)
Out[102]:
In [103]:
diamonds.describe()
Out[103]:
In [107]:
diamonds=diamonds.drop("Unnamed: 0",1)
In [105]:
diamonds=diamonds.dropna(how="any")
In [108]:
sns.distplot(diamonds.price, bins=20, kde=True, rug=False)
Out[108]:
In [110]:
sns.distplot(diamonds.price, bins=20, kde=False, rug=False)
Out[110]:
In [111]:
sns.boxplot(x="color", y="price", data=diamonds)
Out[111]:
In [112]:
sns.jointplot('price','carat',data=diamonds2)
Out[112]:
In [113]:
sns.factorplot(x="color", y="price",
col="cut", data=diamonds, kind="box", size=4, aspect=.5);
In [114]:
from ggplot import *
In [116]:
p + geom_point()
Out[116]:
In [117]:
p + geom_point() +facet_grid('cut')
Out[117]:
In [118]:
p = ggplot(aes(x='price', y='carat',color="cut"), data=diamonds)
p + geom_point()
Out[118]:
In [115]:
p = ggplot(aes(x='price', y='carat'), data=diamonds)
p
Out[115]:
In [39]:
p = ggplot(aes(x='price', y='carat',color="clarity"), data=diamonds)
p + geom_point()
Out[39]:
In [ ]:
diamonds = diamonds.notnull() * 1
In [ ]:
diamonds.head()
In [ ]:
diamonds=diamonds.drop('Unnamed: 0',1)
In [ ]:
diamonds.head()
In [ ]:
diamonds2.head()
In [ ]:
diamonds3=diamonds2.copy()
In [ ]:
diamonds2.fillna("AJAY").head()
In [ ]:
diamonds2=diamonds2.dropna(how="any")
In [ ]:
diamonds2.info()
In [ ]:
data=diamonds3.values
data
In [ ]:
diamonds3.columns
In [ ]:
g=pd.DataFrame(data=data[0:,0:], # values
index=range(0,len(data)), # 1st column as index
columns=diamonds3.columns[0:]) # 1st row as the column names
In [ ]:
g.head()
In [ ]:
diamonds3.iloc[2:5,:]
In [ ]:
diamonds3.iloc[:,2:5]
In [ ]:
diamonds3[['cut','color','clarity']].head()
In [ ]:
diamonds3.ix[20:40]
In [ ]:
diamonds3.corr()
In [ ]:
diamonds3.head()
In [ ]:
diamonds3.drop(diamonds3.index[[1,3]]).head()
In [ ]:
s=pd.Series(range(0,100))
In [ ]:
type(s)
In [ ]:
diamonds3.drop(diamonds3.index[[s]]).head()
In [ ]:
del diamonds
In [ ]:
diamonds3.query('carat >.50 and price >3000')
In [ ]:
del diamonds3["Unnamed: 0"]
In [ ]:
diamonds3.query('price >5000')
In [ ]:
diamonds2.query('color=="J" or price >4000')
In [ ]:
diamonds3['newvar']=1
In [ ]:
diamonds3.head()
In [ ]:
diamonds3.loc[diamonds3.price>=5000,'newvar']="Expensive"
In [ ]:
diamonds3.query('price >5000').head()
In [ ]:
diamonds3['ppc']=diamonds3.price/diamonds3.carat
In [ ]:
diamonds3.head()
In [ ]:
diamonds4=diamonds3.copy()
In [ ]:
diamonds3=diamonds3.dropna(how='any')
In [ ]:
diamonds3.head()
In [ ]:
os.listdir()
In [ ]:
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
In [ ]:
f=pd.read_csv('ccFraud.csv')
Credit for this part http://www.cs.tufts.edu/comp/150VAN/demos/DataWrangling.pdf data from https://packages.revolutionanalytics.com/datasets/ccFraud.csv
In [ ]:
f.dtypes
In [ ]:
f.index
In [ ]:
f.columns
In [ ]:
f.values
In [ ]:
f.describe()
In [ ]:
f.T
In [ ]:
f.sort(columns='balance')
In [ ]:
f.sort_index(axis=0, ascending=False)
In [ ]:
f.sort_index(axis=1)
In [ ]:
f.head()
In [ ]:
f.tail(2)
In [ ]:
f['balance']
In [ ]:
f[1:3]
In [ ]:
f.loc[:,['balance' , 'gender' ]]
In [ ]:
f[['balance' , 'gender' ]]
In [ ]:
f[f['balance'] > 3000]
In [ ]: