In [1]:
import numpy as np

In [2]:
from random import randint,randrange

In [4]:
for x in range(10,50):
    print(randrange(15))


4
7
3
0
9
13
5
3
6
4
14
6
7
0
9
4
8
10
14
1
14
2
7
13
14
10
14
7
0
13
3
6
14
5
13
5
11
2
4
5

In [7]:
def mynewfunction(x,y):
    z=x**2+3*x**2*y+20*y**2
    print(z)

In [9]:
mynewfunction(1,3)


190

In [11]:
mynewfunction(10,30)


27100

In [14]:
def mybadfunction(x,y):
    z=x**2+3*x**2*y+20*y**2
    return(z)

In [15]:
mybadfunction(1,1)


Out[15]:
24

In [16]:
scores=(23,46,69,7,5)

In [17]:
type(scores)


Out[17]:
tuple

In [22]:
sc=(46,45)

In [23]:
type(sc)


Out[23]:
tuple

In [24]:
scores+sc


Out[24]:
(23, 46, 69, 7, 5, 46, 45)

In [25]:
favourite_movie2={'micky mouse':'steamboat willie','vijay':'slumdog millionaire','john':'passion of christ','donald':'arthur'}

In [26]:
type(favourite_movie2)


Out[26]:
dict

In [29]:
favourite_movie2['vijay']


Out[29]:
'slumdog millionaire'

In [30]:
import re

In [31]:
names=["Agung","Deja", "Brahm","Nathan","Ratna","Naufal","Scholly","Siska","Bintang","Sandra"]

In [32]:
for name in names:
    print (re.search(r'(an)',name))


None
None
None
<_sre.SRE_Match object; span=(4, 6), match='an'>
None
None
None
None
<_sre.SRE_Match object; span=(4, 6), match='an'>
<_sre.SRE_Match object; span=(1, 3), match='an'>

In [34]:
for name in names:
    print (bool(re.search(r'(an)',name)))


False
False
False
True
False
False
False
False
True
True

In [35]:
import re
import numpy as np

In [36]:
numlist=["$60000","$80,000","30,000",70000,"55000   "]

In [37]:
enumerate?

In [38]:
re.sub(r"([$,])","",str("$60000"))


Out[38]:
'60000'

In [39]:
int('60000')


Out[39]:
60000

In [40]:
for i,value in enumerate(numlist):
    numlist[i]=re.sub(r"([$,])","",str(value))
    numlist[i]=int(numlist[i])
    print(numlist)


[60000, '$80,000', '30,000', 70000, '55000   ']
[60000, 80000, '30,000', 70000, '55000   ']
[60000, 80000, 30000, 70000, '55000   ']
[60000, 80000, 30000, 70000, '55000   ']
[60000, 80000, 30000, 70000, 55000]

In [41]:
numlist


Out[41]:
[60000, 80000, 30000, 70000, 55000]

In [42]:
import numpy as np

In [43]:
np.mean(numlist)


Out[43]:
59000.0

In [44]:
from datetime import datetime

In [46]:
datetime_object = datetime.strptime('June/17/2016 1:33PM', '%B/%d/%Y %I:%M%p')

In [47]:
datetime_object


Out[47]:
datetime.datetime(2016, 6, 17, 13, 33)

In [48]:
date_object2=datetime.strptime("12dec-2007","%d%b-%Y")

In [49]:
date_object2


Out[49]:
datetime.datetime(2007, 12, 12, 0, 0)

In [53]:
a=date_object2-datetime_object

In [54]:
a


Out[54]:
datetime.timedelta(-3111, 37620)

In [58]:
a.days


Out[58]:
-3111

In [60]:
a.days/30


Out[60]:
-103.7

In [61]:
from dateutil import relativedelta

In [67]:
r =- relativedelta.relativedelta(date_object2, datetime_object)

In [68]:
r.months


Out[68]:
6

In [69]:
r.years


Out[69]:
8

In [71]:
def f(x):return x**3+3*x**2

In [72]:
f(1)


Out[72]:
4

In [73]:
type(f)


Out[73]:
function

In [74]:
g=lambda x:x**3+3*x**2

In [75]:
g(10)


Out[75]:
1300

In [76]:
type(g)


Out[76]:
function

In [77]:
import pandas as pd

In [78]:
import os as os

In [81]:
diamonds=pd.read_csv("C:\\Users\\KOGENTIX\\Desktop\\training\\BigDiamonds.csv\\BigDiamonds.csv")

In [84]:
diamonds.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598024 entries, 0 to 598023
Data columns (total 13 columns):
Unnamed: 0      598024 non-null int64
carat           598024 non-null float64
cut             598024 non-null object
color           598024 non-null object
clarity         598024 non-null object
table           598024 non-null float64
depth           598024 non-null float64
cert            598024 non-null object
measurements    597978 non-null object
price           597311 non-null float64
x               596209 non-null float64
y               596172 non-null float64
z               595480 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 59.3+ MB

In [96]:
adult=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=None)

In [98]:
#pd.read_csv?
'''this is 
a multiple
line comment
'''


Out[98]:
'this is \na multiple\nline comment\n'

In [99]:
adult.head()


Out[99]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
0 39 State-gov 77516 Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private 215646 HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 234721 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private 338409 Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K

In [101]:
adult.columns


Out[101]:
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], dtype='int64')

In [102]:
adult.columns=["age ",
"workclass ",
"fnlwgt",
"education ",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"income",
]

In [103]:
adult.head()


Out[103]:
age workclass fnlwgt education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country income
0 39 State-gov 77516 Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private 215646 HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 234721 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private 338409 Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K

In [105]:
wb=pd.read_json("C:\\Users\\KOGENTIX\\Desktop\\training\\world_bank.json",lines=True)

In [106]:
wb.head()


Out[106]:
_id approvalfy board_approval_month boardapprovaldate borrower closingdate country_namecode countrycode countryname countryshortname ... sectorcode source status supplementprojectflg theme1 theme_namecode themecode totalamt totalcommamt url
0 {'$oid': '52b213b38594d8a2be17c780'} 1999 November 2013-11-12T00:00:00Z FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA 2018-07-07T00:00:00Z Federal Democratic Republic of Ethiopia!$!ET ET Federal Democratic Republic of Ethiopia Ethiopia ... ET,BS,ES,EP IBRD Active N {'Name': 'Education for all', 'Percent': 100} [{'name': 'Education for all', 'code': '65'}] 65 130000000 130000000 http://www.worldbank.org/projects/P129828/ethi...
1 {'$oid': '52b213b38594d8a2be17c781'} 2015 November 2013-11-04T00:00:00Z GOVERNMENT OF TUNISIA NaN Republic of Tunisia!$!TN TN Republic of Tunisia Tunisia ... BZ,BS IBRD Active N {'Name': 'Other economic management', 'Percent... [{'name': 'Other economic management', 'code':... 54,24 0 4700000 http://www.worldbank.org/projects/P144674?lang=en
2 {'$oid': '52b213b38594d8a2be17c782'} 2014 November 2013-11-01T00:00:00Z MINISTRY OF FINANCE AND ECONOMIC DEVEL NaN Tuvalu!$!TV TV Tuvalu Tuvalu ... TI IBRD Active Y {'Name': 'Regional integration', 'Percent': 46} [{'name': 'Regional integration', 'code': '47'... 52,81,25,47 6060000 6060000 http://www.worldbank.org/projects/P145310?lang=en
3 {'$oid': '52b213b38594d8a2be17c783'} 2014 October 2013-10-31T00:00:00Z MIN. OF PLANNING AND INT'L COOPERATION NaN Republic of Yemen!$!RY RY Republic of Yemen Yemen, Republic of ... JB IBRD Active N {'Name': 'Participation and civic engagement',... [{'name': 'Participation and civic engagement'... 59,57 0 1500000 http://www.worldbank.org/projects/P144665?lang=en
4 {'$oid': '52b213b38594d8a2be17c784'} 2014 October 2013-10-31T00:00:00Z MINISTRY OF FINANCE 2019-04-30T00:00:00Z Kingdom of Lesotho!$!LS LS Kingdom of Lesotho Lesotho ... FH,YW,YZ IBRD Active N {'Name': 'Export development and competitivene... [{'name': 'Export development and competitiven... 41,45 13100000 13100000 http://www.worldbank.org/projects/P144933/seco...

5 rows × 50 columns


In [107]:
wb.columns


Out[107]:
Index(['_id', 'approvalfy', 'board_approval_month', 'boardapprovaldate',
       'borrower', 'closingdate', 'country_namecode', 'countrycode',
       'countryname', 'countryshortname', 'docty', 'envassesmentcategorycode',
       'grantamt', 'ibrdcommamt', 'id', 'idacommamt', 'impagency',
       'lendinginstr', 'lendinginstrtype', 'lendprojectcost',
       'majorsector_percent', 'mjsector_namecode', 'mjtheme',
       'mjtheme_namecode', 'mjthemecode', 'prodline', 'prodlinetext',
       'productlinetype', 'project_abstract', 'project_name', 'projectdocs',
       'projectfinancialtype', 'projectstatusdisplay', 'regionname', 'sector',
       'sector1', 'sector2', 'sector3', 'sector4', 'sector_namecode',
       'sectorcode', 'source', 'status', 'supplementprojectflg', 'theme1',
       'theme_namecode', 'themecode', 'totalamt', 'totalcommamt', 'url'],
      dtype='object')

In [108]:
type(adult)


Out[108]:
pandas.core.frame.DataFrame

In [109]:
adult.values


Out[109]:
array([[39, ' State-gov', 77516, ..., 40, ' United-States', ' <=50K'],
       [50, ' Self-emp-not-inc', 83311, ..., 13, ' United-States', ' <=50K'],
       [38, ' Private', 215646, ..., 40, ' United-States', ' <=50K'],
       ..., 
       [58, ' Private', 151910, ..., 40, ' United-States', ' <=50K'],
       [22, ' Private', 201490, ..., 20, ' United-States', ' <=50K'],
       [52, ' Self-emp-inc', 287927, ..., 40, ' United-States', ' >50K']], dtype=object)

In [110]:
b=adult.values

In [111]:
type(b)


Out[111]:
numpy.ndarray

In [112]:
len(b)


Out[112]:
32561

In [115]:
np.arange(len(b))


Out[115]:
array([    0,     1,     2, ..., 32558, 32559, 32560])

In [116]:
c=["age ",
"workclass ",
"fnlwgt",
"education ",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"income",
]

In [118]:
b


Out[118]:
array([[39, ' State-gov', 77516, ..., 40, ' United-States', ' <=50K'],
       [50, ' Self-emp-not-inc', 83311, ..., 13, ' United-States', ' <=50K'],
       [38, ' Private', 215646, ..., 40, ' United-States', ' <=50K'],
       ..., 
       [58, ' Private', 151910, ..., 40, ' United-States', ' <=50K'],
       [22, ' Private', 201490, ..., 20, ' United-States', ' <=50K'],
       [52, ' Self-emp-inc', 287927, ..., 40, ' United-States', ' >50K']], dtype=object)

In [119]:
c


Out[119]:
['age ',
 'workclass ',
 'fnlwgt',
 'education ',
 'education-num',
 'marital-status',
 'occupation',
 'relationship',
 'race',
 'sex',
 'capital-gain',
 'capital-loss',
 'hours-per-week',
 'native-country',
 'income']

In [121]:
d=np.arange(len(b))

In [122]:
d


Out[122]:
array([    0,     1,     2, ..., 32558, 32559, 32560])

In [123]:
adult3=pd.DataFrame(data=b,    # values
             index=d,    # 1st column as index
              columns=c)  # 1st row as the column names

In [124]:
adult3.head()


Out[124]:
age workclass fnlwgt education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country income
0 39 State-gov 77516 Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private 215646 HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 234721 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private 338409 Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K

In [ ]: