notebook.community

Edit and run



In [1]:

    
import numpy as np



In [2]:

    
from random import randint,randrange



In [4]:

    
for x in range(10,50):
    print(randrange(15))



In [7]:

    
def mynewfunction(x,y):
    z=x**2+3*x**2*y+20*y**2
    print(z)



In [9]:

    
mynewfunction(1,3)



In [11]:

    
mynewfunction(10,30)



In [14]:

    
def mybadfunction(x,y):
    z=x**2+3*x**2*y+20*y**2
    return(z)



In [15]:

    
mybadfunction(1,1)









    Out[15]:





24



In [16]:

    
scores=(23,46,69,7,5)



In [17]:

    
type(scores)









    Out[17]:





tuple



In [22]:

    
sc=(46,45)



In [23]:

    
type(sc)









    Out[23]:





tuple



In [24]:

    
scores+sc









    Out[24]:





(23, 46, 69, 7, 5, 46, 45)



In [25]:

    
favourite_movie2={'micky mouse':'steamboat willie','vijay':'slumdog millionaire','john':'passion of christ','donald':'arthur'}



In [26]:

    
type(favourite_movie2)









    Out[26]:





dict



In [29]:

    
favourite_movie2['vijay']









    Out[29]:





'slumdog millionaire'



In [30]:

    
import re



In [31]:

    
names=["Agung","Deja", "Brahm","Nathan","Ratna","Naufal","Scholly","Siska","Bintang","Sandra"]



In [32]:

    
for name in names:
    print (re.search(r'(an)',name))









    



None
None
None
<_sre.SRE_Match object; span=(4, 6), match='an'>
None
None
None
None
<_sre.SRE_Match object; span=(4, 6), match='an'>
<_sre.SRE_Match object; span=(1, 3), match='an'>



In [34]:

    
for name in names:
    print (bool(re.search(r'(an)',name)))









    



False
False
False
True
False
False
False
False
True
True



In [35]:

    
import re
import numpy as np



In [36]:

    
numlist=["$60000","$80,000","30,000",70000,"55000   "]



In [37]:

    
enumerate?



In [38]:

    
re.sub(r"([$,])","",str("$60000"))









    Out[38]:





'60000'



In [39]:

    
int('60000')









    Out[39]:





60000



In [40]:

    
for i,value in enumerate(numlist):
    numlist[i]=re.sub(r"([$,])","",str(value))
    numlist[i]=int(numlist[i])
    print(numlist)









    



[60000, '$80,000', '30,000', 70000, '55000   ']
[60000, 80000, '30,000', 70000, '55000   ']
[60000, 80000, 30000, 70000, '55000   ']
[60000, 80000, 30000, 70000, '55000   ']
[60000, 80000, 30000, 70000, 55000]



In [41]:

    
numlist









    Out[41]:





[60000, 80000, 30000, 70000, 55000]



In [42]:

    
import numpy as np



In [43]:

    
np.mean(numlist)









    Out[43]:





59000.0



In [44]:

    
from datetime import datetime



In [46]:

    
datetime_object = datetime.strptime('June/17/2016 1:33PM', '%B/%d/%Y %I:%M%p')



In [47]:

    
datetime_object









    Out[47]:





datetime.datetime(2016, 6, 17, 13, 33)



In [48]:

    
date_object2=datetime.strptime("12dec-2007","%d%b-%Y")



In [49]:

    
date_object2









    Out[49]:





datetime.datetime(2007, 12, 12, 0, 0)



In [53]:

    
a=date_object2-datetime_object



In [54]:

    
a









    Out[54]:





datetime.timedelta(-3111, 37620)



In [58]:

    
a.days









    Out[58]:





-3111



In [60]:

    
a.days/30









    Out[60]:





-103.7



In [61]:

    
from dateutil import relativedelta



In [67]:

    
r =- relativedelta.relativedelta(date_object2, datetime_object)



In [68]:

    
r.months









    Out[68]:





6



In [69]:

    
r.years









    Out[69]:





8



In [71]:

    
def f(x):return x**3+3*x**2



In [72]:

    
f(1)









    Out[72]:





4



In [73]:

    
type(f)









    Out[73]:





function



In [74]:

    
g=lambda x:x**3+3*x**2



In [75]:

    
g(10)









    Out[75]:





1300



In [76]:

    
type(g)









    Out[76]:





function



In [77]:

    
import pandas as pd



In [78]:

    
import os as os



In [81]:

    
diamonds=pd.read_csv("C:\\Users\\KOGENTIX\\Desktop\\training\\BigDiamonds.csv\\BigDiamonds.csv")



In [84]:

    
diamonds.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598024 entries, 0 to 598023
Data columns (total 13 columns):
Unnamed: 0      598024 non-null int64
carat           598024 non-null float64
cut             598024 non-null object
color           598024 non-null object
clarity         598024 non-null object
table           598024 non-null float64
depth           598024 non-null float64
cert            598024 non-null object
measurements    597978 non-null object
price           597311 non-null float64
x               596209 non-null float64
y               596172 non-null float64
z               595480 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 59.3+ MB



In [96]:

    
adult=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=None)



In [98]:

    
#pd.read_csv?
'''this is 
a multiple
line comment
'''









    Out[98]:





'this is \na multiple\nline comment\n'



In [99]:

    
adult.head()









    Out[99]:







  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [101]:

    
adult.columns









    Out[101]:





Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], dtype='int64')



In [102]:

    
adult.columns=["age ",
"workclass ",
"fnlwgt",
"education ",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"income",
]



In [103]:

    
adult.head()









    Out[103]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [105]:

    
wb=pd.read_json("C:\\Users\\KOGENTIX\\Desktop\\training\\world_bank.json",lines=True)



In [106]:

    
wb.head()









    Out[106]:







  
    
      
      _id
      approvalfy
      board_approval_month
      boardapprovaldate
      borrower
      closingdate
      country_namecode
      countrycode
      countryname
      countryshortname
      ...
      sectorcode
      source
      status
      supplementprojectflg
      theme1
      theme_namecode
      themecode
      totalamt
      totalcommamt
      url
    
  
  
    
      0
      {'$oid': '52b213b38594d8a2be17c780'}
      1999
      November
      2013-11-12T00:00:00Z
      FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA
      2018-07-07T00:00:00Z
      Federal Democratic Republic of Ethiopia!$!ET
      ET
      Federal Democratic Republic of Ethiopia
      Ethiopia
      ...
      ET,BS,ES,EP
      IBRD
      Active
      N
      {'Name': 'Education for all', 'Percent': 100}
      [{'name': 'Education for all', 'code': '65'}]
      65
      130000000
      130000000
      http://www.worldbank.org/projects/P129828/ethi...
    
    
      1
      {'$oid': '52b213b38594d8a2be17c781'}
      2015
      November
      2013-11-04T00:00:00Z
      GOVERNMENT OF TUNISIA
      NaN
      Republic of Tunisia!$!TN
      TN
      Republic of Tunisia
      Tunisia
      ...
      BZ,BS
      IBRD
      Active
      N
      {'Name': 'Other economic management', 'Percent...
      [{'name': 'Other economic management', 'code':...
      54,24
      0
      4700000
      http://www.worldbank.org/projects/P144674?lang=en
    
    
      2
      {'$oid': '52b213b38594d8a2be17c782'}
      2014
      November
      2013-11-01T00:00:00Z
      MINISTRY OF FINANCE AND ECONOMIC DEVEL
      NaN
      Tuvalu!$!TV
      TV
      Tuvalu
      Tuvalu
      ...
      TI
      IBRD
      Active
      Y
      {'Name': 'Regional integration', 'Percent': 46}
      [{'name': 'Regional integration', 'code': '47'...
      52,81,25,47
      6060000
      6060000
      http://www.worldbank.org/projects/P145310?lang=en
    
    
      3
      {'$oid': '52b213b38594d8a2be17c783'}
      2014
      October
      2013-10-31T00:00:00Z
      MIN. OF PLANNING AND INT'L COOPERATION
      NaN
      Republic of Yemen!$!RY
      RY
      Republic of Yemen
      Yemen, Republic of
      ...
      JB
      IBRD
      Active
      N
      {'Name': 'Participation and civic engagement',...
      [{'name': 'Participation and civic engagement'...
      59,57
      0
      1500000
      http://www.worldbank.org/projects/P144665?lang=en
    
    
      4
      {'$oid': '52b213b38594d8a2be17c784'}
      2014
      October
      2013-10-31T00:00:00Z
      MINISTRY OF FINANCE
      2019-04-30T00:00:00Z
      Kingdom of Lesotho!$!LS
      LS
      Kingdom of Lesotho
      Lesotho
      ...
      FH,YW,YZ
      IBRD
      Active
      N
      {'Name': 'Export development and competitivene...
      [{'name': 'Export development and competitiven...
      41,45
      13100000
      13100000
      http://www.worldbank.org/projects/P144933/seco...
    
  

5 rows × 50 columns



In [107]:

    
wb.columns









    Out[107]:





Index(['_id', 'approvalfy', 'board_approval_month', 'boardapprovaldate',
       'borrower', 'closingdate', 'country_namecode', 'countrycode',
       'countryname', 'countryshortname', 'docty', 'envassesmentcategorycode',
       'grantamt', 'ibrdcommamt', 'id', 'idacommamt', 'impagency',
       'lendinginstr', 'lendinginstrtype', 'lendprojectcost',
       'majorsector_percent', 'mjsector_namecode', 'mjtheme',
       'mjtheme_namecode', 'mjthemecode', 'prodline', 'prodlinetext',
       'productlinetype', 'project_abstract', 'project_name', 'projectdocs',
       'projectfinancialtype', 'projectstatusdisplay', 'regionname', 'sector',
       'sector1', 'sector2', 'sector3', 'sector4', 'sector_namecode',
       'sectorcode', 'source', 'status', 'supplementprojectflg', 'theme1',
       'theme_namecode', 'themecode', 'totalamt', 'totalcommamt', 'url'],
      dtype='object')



In [108]:

    
type(adult)









    Out[108]:





pandas.core.frame.DataFrame



In [109]:

    
adult.values









    Out[109]:





array([[39, ' State-gov', 77516, ..., 40, ' United-States', ' <=50K'],
       [50, ' Self-emp-not-inc', 83311, ..., 13, ' United-States', ' <=50K'],
       [38, ' Private', 215646, ..., 40, ' United-States', ' <=50K'],
       ..., 
       [58, ' Private', 151910, ..., 40, ' United-States', ' <=50K'],
       [22, ' Private', 201490, ..., 20, ' United-States', ' <=50K'],
       [52, ' Self-emp-inc', 287927, ..., 40, ' United-States', ' >50K']], dtype=object)



In [110]:

    
b=adult.values



In [111]:

    
type(b)









    Out[111]:





numpy.ndarray



In [112]:

    
len(b)









    Out[112]:





32561



In [115]:

    
np.arange(len(b))









    Out[115]:





array([    0,     1,     2, ..., 32558, 32559, 32560])



In [116]:

    
c=["age ",
"workclass ",
"fnlwgt",
"education ",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"income",
]



In [118]:

    
b









    Out[118]:





array([[39, ' State-gov', 77516, ..., 40, ' United-States', ' <=50K'],
       [50, ' Self-emp-not-inc', 83311, ..., 13, ' United-States', ' <=50K'],
       [38, ' Private', 215646, ..., 40, ' United-States', ' <=50K'],
       ..., 
       [58, ' Private', 151910, ..., 40, ' United-States', ' <=50K'],
       [22, ' Private', 201490, ..., 20, ' United-States', ' <=50K'],
       [52, ' Self-emp-inc', 287927, ..., 40, ' United-States', ' >50K']], dtype=object)



In [119]:

    
c









    Out[119]:





['age ',
 'workclass ',
 'fnlwgt',
 'education ',
 'education-num',
 'marital-status',
 'occupation',
 'relationship',
 'race',
 'sex',
 'capital-gain',
 'capital-loss',
 'hours-per-week',
 'native-country',
 'income']



In [121]:

    
d=np.arange(len(b))



In [122]:

    
d









    Out[122]:





array([    0,     1,     2, ..., 32558, 32559, 32560])



In [123]:

    
adult3=pd.DataFrame(data=b,    # values
             index=d,    # 1st column as index
              columns=c)  # 1st row as the column names



In [124]:

    
adult3.head()









    Out[124]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [ ]:

	0	1	2	3	4	5	6	7	8	9	10	12	13	14
0	39	State-gov	77516	Bachelors	13	Never-married	Adm-clerical	Not-in-family	White	Male	2174	40	United-States	<=50K
1	50	Self-emp-not-inc	83311	Bachelors	13	Married-civ-spouse	Exec-managerial	Husband	White	Male	0	13	United-States	<=50K
2	38	Private	215646	HS-grad	9	Divorced	Handlers-cleaners	Not-in-family	White	Male	0	40	United-States	<=50K
3	53	Private	234721	11th	7	Married-civ-spouse	Handlers-cleaners	Husband	Black	Male	0	40	United-States	<=50K
4	28	Private	338409	Bachelors	13	Married-civ-spouse	Prof-specialty	Wife	Black	Female	0	40	Cuba	<=50K

	_id	approvalfy	board_approval_month	boardapprovaldate	borrower	closingdate	country_namecode	countrycode	countryname	countryshortname	...	sectorcode	source	status	supplementprojectflg	theme1	theme_namecode	themecode	totalamt	totalcommamt	url
0	{'$oid': '52b213b38594d8a2be17c780'}	1999	November	2013-11-12T00:00:00Z	FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA	2018-07-07T00:00:00Z	Federal Democratic Republic of Ethiopia!$!ET	ET	Federal Democratic Republic of Ethiopia	Ethiopia	...	ET,BS,ES,EP	IBRD	Active	N	{'Name': 'Education for all', 'Percent': 100}	[{'name': 'Education for all', 'code': '65'}]	65	130000000	130000000	http://www.worldbank.org/projects/P129828/ethi...
1	{'$oid': '52b213b38594d8a2be17c781'}	2015	November	2013-11-04T00:00:00Z	GOVERNMENT OF TUNISIA	NaN	Republic of Tunisia!$!TN	TN	Republic of Tunisia	Tunisia	...	BZ,BS	IBRD	Active	N	{'Name': 'Other economic management', 'Percent...	[{'name': 'Other economic management', 'code':...	54,24	0	4700000	http://www.worldbank.org/projects/P144674?lang=en
2	{'$oid': '52b213b38594d8a2be17c782'}	2014	November	2013-11-01T00:00:00Z	MINISTRY OF FINANCE AND ECONOMIC DEVEL	NaN	Tuvalu!$!TV	TV	Tuvalu	Tuvalu	...	TI	IBRD	Active	Y	{'Name': 'Regional integration', 'Percent': 46}	[{'name': 'Regional integration', 'code': '47'...	52,81,25,47	6060000	6060000	http://www.worldbank.org/projects/P145310?lang=en
3	{'$oid': '52b213b38594d8a2be17c783'}	2014	October	2013-10-31T00:00:00Z	MIN. OF PLANNING AND INT'L COOPERATION	NaN	Republic of Yemen!$!RY	RY	Republic of Yemen	Yemen, Republic of	...	JB	IBRD	Active	N	{'Name': 'Participation and civic engagement',...	[{'name': 'Participation and civic engagement'...	59,57	0	1500000	http://www.worldbank.org/projects/P144665?lang=en
4	{'$oid': '52b213b38594d8a2be17c784'}	2014	October	2013-10-31T00:00:00Z	MINISTRY OF FINANCE	2019-04-30T00:00:00Z	Kingdom of Lesotho!$!LS	LS	Kingdom of Lesotho	Lesotho	...	FH,YW,YZ	IBRD	Active	N	{'Name': 'Export development and competitivene...	[{'name': 'Export development and competitiven...	41,45	13100000	13100000	http://www.worldbank.org/projects/P144933/seco...