Note: you can find my iPython Notebook for Dataset 1 here -> https://github.com/M0nica/2016-new-coder-survey
In [1]:
# workon dataanalysis - my virtual environment
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/matplotlib/__init__.py:1035: UserWarning: Duplicate key in file "/Users/Monica/.matplotlib/matplotlibrc", line #2
(fname, cnt))
In [2]:
# df = pd.read_table('34933-0001-Data.tsv')
odf = pd.read_csv('accreditation_2016_03.csv')
In [3]:
odf.head()
Out[3]:
Institution_ID
Institution_Name
Institution_Address
Institution_City
Institution_State
Institution_Zip
Institution_Phone
Institution_OPEID
Institution_IPEDS_UnitID
Institution_Web_Address
...
Campus_Zip
Campus_IPEDS_UnitID
Accreditation_Type
Agency_Name
Agency_Status
Program_Name
Accreditation_Status
Accreditation_Date_Type
Periods
Last Action
0
100016
Community College of the Air Force
130 W Maxwell Blvd
Montgomery
AL
"36112-6613"
334-953-6436
"01230800"
100636.0
www.maxwell.af.mil/au/ccaf/
...
"76311-2263"
NaN
Specialized
American Physical Therapy Association, Commiss...
NaN
Physical Therapy (PTA) - Programs for the phys...
Accredited
Actual
12/01/1976 - Current
NaN
1
100016
Community College of the Air Force
130 W Maxwell Blvd
Montgomery
AL
"36112-6613"
334-953-6436
"01230800"
100636.0
www.maxwell.af.mil/au/ccaf/
...
NaN
NaN
Institutional
Southern Association of Colleges and Schools, ...
NaN
NaN
Accredited
Estimated
07/01/1980 - Current
NaN
2
100025
Alabama A & M University
4900 Meridian St
Normal
AL
"35762"
256-372-5000
"00100200"
100654.0
www.aamu.edu/
...
NaN
NaN
Institutional
Southern Association of Colleges and Schools, ...
NaN
NaN
Accredited
Estimated
07/01/1963 - Current
NaN
3
100025
Alabama A & M University
4900 Meridian St
Normal
AL
"35762"
256-372-5000
"00100200"
100654.0
www.aamu.edu/
...
NaN
NaN
Specialized
Academy of Nutrition and Dietetics, Accreditat...
NaN
Didactic Program in Dietetics
Accredited
Actual
12/01/1979 - Current
Probation
4
100025
Alabama A & M University
4900 Meridian St
Normal
AL
"35762"
256-372-5000
"00100200"
100654.0
www.aamu.edu/
...
NaN
NaN
Specialized
American Speech-Language-Hearing Association, ...
NaN
Speech-Language Pathology (SLP) - Graduate deg...
Accredited
Actual
10/01/1994 - Current
NaN
5 rows × 25 columns
In [12]:
odf.columns
Out[12]:
Index(['Institution_ID', 'Institution_Name', 'Institution_Address',
'Institution_City', 'Institution_State', 'Institution_Zip',
'Institution_Phone', 'Institution_OPEID', 'Institution_IPEDS_UnitID',
'Institution_Web_Address', 'Campus_ID', 'Campus_Name', 'Campus_Address',
'Campus_City', 'Campus_State', 'Campus_Zip', 'Campus_IPEDS_UnitID',
'Accreditation_Type', 'Agency_Name', 'Agency_Status', 'Program_Name',
'Accreditation_Status', 'Accreditation_Date_Type', 'Periods',
'Last Action'],
dtype='object')
In [13]:
odf['Campus_City'].value_counts().head(10)
Out[13]:
Chicago 283
Philadelphia 167
Phoenix 156
Indianapolis 134
Houston 123
New York 105
Kansas City 95
Columbus 95
Springfield 93
Miami 92
Name: Campus_City, dtype: int64
In [14]:
top_cities = odf['Campus_City'].value_counts().head(10).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
top_cities.set_title('Top 10 College Cities (By Number of Colleges in State)')
top_cities.set_xlabel('City')
top_cities.set_ylabel('# of Colleges')
plt.savefig('topcollegecities.png')
In [15]:
top_cities = odf['Campus_State'].value_counts().head(10).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
top_cities.set_title('Top 10 College States (By Number of Campuses in State)')
top_cities.set_xlabel('City')
top_cities.set_ylabel('# of Colleges')
plt.savefig('topcollegecities.png')
In [16]:
odf['Accreditation_Status'].value_counts()
Out[16]:
Accredited 38695
Pre-Accredited 1302
Name: Accreditation_Status, dtype: int64
In [4]:
df = pd.read_csv('Full Results - Stack Overflow Developer Survey - 2015 2.csv', encoding ='mac_roman')
/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (5,108,121,196,197,198) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [26]:
df.head()
Out[26]:
Country
Age
Gender
Tabs or Spaces
Years IT / Programming Experience
Occupation
Desktop Operating System
Desktop Operating System: write-in
Current Lang & Tech: Android
Current Lang & Tech: Arduino
...
Why use Stack Overflow: I don't use Stack Overflow
How often are Stack Overflow's answers helpful
Why answer: Help a programmer in need
Why answer: Help future programmers
Why answer: Demonstrate expertise
Why answer: Self promotion
Why answer: Sense of responsibility to developers
Why answer: No idea
Why answer: I don't answer and I don't want to
Why answer: I don't answer but I want to
0
Croatia
25-29
Male
Tabs
2 - 5 years
Back-end web developer
Ubuntu
NaN
NaN
NaN
...
NaN
Usually
NaN
NaN
It feels good to demonstrate my expertise.
Demonstrating my expertise will benefit me
I feel a sense of responsibility to the develo...
NaN
NaN
NaN
1
France
20-24
Male
Spaces
1 - 2 years
Back-end web developer
Windows 7
NaN
NaN
NaN
...
NaN
Usually
NaN
My answer will help lots of people who have th...
It feels good to demonstrate my expertise.
NaN
NaN
NaN
NaN
NaN
2
India
20-24
Male
Tabs
1 - 2 years
Back-end web developer
Windows 7
NaN
NaN
NaN
...
NaN
Rarely
NaN
NaN
NaN
Demonstrating my expertise will benefit me
NaN
NaN
NaN
NaN
3
Latvia
25-29
Male
It depends
6 - 10 years
Back-end web developer
Ubuntu
NaN
NaN
NaN
...
NaN
Usually
It feels good to help a programmer in need
My answer will help lots of people who have th...
It feels good to demonstrate my expertise.
Demonstrating my expertise will benefit me
I feel a sense of responsibility to the develo...
NaN
NaN
NaN
4
Norway
30-34
Male
Tabs
2 - 5 years
Back-end web developer
Windows 8
NaN
NaN
NaN
...
NaN
Usually
It feels good to help a programmer in need
NaN
NaN
Demonstrating my expertise will benefit me
NaN
NaN
NaN
I don't answer much (or at all), but I want to...
5 rows × 222 columns
In [5]:
df.columns
Out[5]:
Index(['Country', 'Age', 'Gender', 'Tabs or Spaces',
'Years IT / Programming Experience', 'Occupation',
'Desktop Operating System', 'Desktop Operating System: write-in',
'Current Lang & Tech: Android', 'Current Lang & Tech: Arduino',
...
'Why use Stack Overflow: I don't use Stack Overflow',
'How often are Stack Overflow's answers helpful',
'Why answer: Help a programmer in need',
'Why answer: Help future programmers',
'Why answer: Demonstrate expertise', 'Why answer: Self promotion',
'Why answer: Sense of responsibility to developers',
'Why answer: No idea', 'Why answer: I don't answer and I don't want to',
'Why answer: I don't answer but I want to'],
dtype='object', length=222)
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26086 entries, 0 to 26085
Columns: 222 entries, Country to Why answer: I don't answer but I want to
dtypes: object(222)
memory usage: 44.2+ MB
In [7]:
df['Age'].value_counts().head(10).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x119c8d2e8>
In [44]:
df['Industry'].value_counts().head(10).plot(kind="barh", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x1171d8cf8>
In [38]:
df['Preferred text editor'].value_counts().head(10).plot(kind="barh", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x115be3898>
In [ ]:
df['Preferred text editor'].value_counts().head(10).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
In [47]:
#df['Training & Education: BS in CS'].value_counts().head(10).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
In [107]:
df['Occupation'].value_counts()
Out[107]:
Full-stack web developer 6765
Student 2845
Back-end web developer 2104
Desktop developer 1735
Front-end web developer 1242
Mobile developer - Android 847
Mobile developer - iOS 634
Embedded application developer 609
Enterprise level services developer 599
Developer with a statistics or mathematics background 464
Data scientist 409
Executive (VP of Eng., CTO, CIO, etc.) 376
Mobile developer 376
System administrator 344
DevOps 322
Business intelligence or data warehousing expert 179
Quality Assurance 164
Graphics programmer 164
Product manager 157
Designer 150
Machine learning developer 145
Database administrator 119
Growth hacker 59
Mobile developer - Windows Phone 43
Name: Occupation, dtype: int64
In [105]:
df.groupby('Gender')['Occupation'].value_counts().plot(kind="bar", color = ['#599ad3', '#f9a65a']) # too mmuch data to appropriately display
Out[105]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ceab0b8>
In [143]:
#df.groupby('Gender')['Years IT / Programming Experience'].value_counts()
#i = ["Male", "Female"]
gender_df = df[(df['Gender'] == 'Male') | (df['Gender'] == 'Female')]
print(gender_df['Gender'].value_counts())
#gender_df.groupby('Gender')['Years IT / Programming Experience'].value_counts().plot(kind="bar", color = ['#599ad3', '#f9a65a'])
Male 23699
Female 1480
Name: Gender, dtype: int64
In [144]:
gender_df.groupby('Gender')['Occupation'].value_counts()
gender_df = gender_df[gender_df['Occupation'] == "Full-stack web developer"]
gender_df.groupby('Gender')['Occupation'].value_counts().plot(kind="bar", color = ['#599ad3', '#f9a65a'])
Out[144]:
<matplotlib.axes._subplots.AxesSubplot at 0x117a23748>
In [145]:
gender_df.groupby('Gender')['Years IT / Programming Experience'].value_counts().head(10).plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
Out[145]:
<matplotlib.axes._subplots.AxesSubplot at 0x117a8df98>
In [9]:
df['Age'].value_counts()
Out[9]:
25-29 7365
20-24 6339
30-34 4593
35-39 2357
< 20 2261
40-50 1965
51-60 488
Prefer not to disclose 339
> 60 124
Name: Age, dtype: int64
In [116]:
df.groupby('Gender')['Age'].value_counts().plot(kind="bar", color = ['#624ea7', '#599ad3', '#f9a65a', '#9e66ab', 'purple'])
Out[116]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d495198>
In [88]:
df["AgeScale"] = df["Age"].apply(str).replace("< 20", "0").apply(str).replace("20-24", "1").apply(str).replace("25-29", "2").apply(str).replace("30-34", "3").apply(str).replace("30-34", "3").apply(str).replace("35-39", "4").apply(str).replace("40-50", "5").apply(str).replace("51-60", "6").apply(str).replace("> 60", "7")
In [89]:
print(df["AgeScale"].head(10))
0 2
1 1
2 1
3 2
4 3
5 4
6 0
7 2
8 3
9 4
Name: AgeScale, dtype: object
In [90]:
years_df =df[df['AgeScale'] != "Prefer not to disclose"]
years_df['AgeScale'] = years_df['AgeScale'].astype(float)
/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
In [91]:
print(years_df).head()
Country Age Gender Tabs or Spaces \
0 Croatia 25-29 Male Tabs
1 France 20-24 Male Spaces
2 India 20-24 Male Tabs
3 Latvia 25-29 Male It depends
4 Norway 30-34 Male Tabs
5 United States 35-39 Male Tabs
6 NaN < 20 Male Tabs
7 Argentina 25-29 Male Spaces
8 Australia 30-34 Male Tabs
9 Belgium 35-39 Male Tabs
10 Belgium 35-39 Male Tabs
11 Belgium 25-29 Male Tabs
12 Brazil 40-50 Male Tabs
13 Brazil 25-29 Male It depends
14 Brazil 20-24 Male Tabs
15 Canada 25-29 Male Tabs
16 Canada 25-29 Male It depends
17 Canada 30-34 Male Tabs
18 Canada 20-24 Male Spaces
19 Canada 35-39 Male Tabs
20 Canada 30-34 Male Spaces
21 China 25-29 Male It depends
22 Denmark 30-34 Male It depends
23 Denmark 25-29 Male Tabs
24 Denmark 30-34 Male Spaces
25 Denmark 35-39 Male It depends
26 Egypt 25-29 Male Tabs
27 Finland 30-34 Prefer not to disclose Spaces
28 France 25-29 Male Tabs
29 France 30-34 Male Tabs
... ... ... ... ...
26054 United States 25-29 Male Spaces
26055 United States 20-24 Male It depends
26056 United States > 60 Other Spaces
26057 United States 30-34 Male Huh?
26059 United States 40-50 Male It depends
26060 United States 30-34 Male Tabs
26061 Uruguay 30-34 Male It depends
26062 Uruguay 25-29 Male Tabs
26063 Uruguay 20-24 Female Huh?
26064 Vanuatu 30-34 Male Tabs
26065 Venezuela 20-24 Male Tabs
26066 Vietnam 20-24 Male Tabs
26067 Vietnam 25-29 Male Tabs
26068 Vietnam 25-29 Male Tabs
26069 Vietnam 30-34 Male Spaces
26070 Vietnam 25-29 Male Huh?
26071 Vietnam 20-24 Male Tabs
26072 Vietnam 20-24 Male Tabs
26073 Vietnam 25-29 Male Spaces
26074 Vietnam 20-24 Male It depends
26075 Vietnam 25-29 Male Tabs
26076 Vietnam 20-24 Male Tabs
26077 Vietnam 20-24 Male It depends
26078 Vietnam 20-24 Male Spaces
26079 Vietnam NaN NaN NaN
26080 Vietnam < 20 Prefer not to disclose Huh?
26081 Vietnam 20-24 Male Tabs
26082 Vietnam 25-29 Male Tabs
26083 Vietnam 25-29 Male It depends
26084 Zimbabwe NaN NaN Huh?
Years IT / Programming Experience Occupation \
0 2 - 5 years Back-end web developer
1 1 - 2 years Back-end web developer
2 1 - 2 years Back-end web developer
3 6 - 10 years Back-end web developer
4 2 - 5 years Back-end web developer
5 11+ years Back-end web developer
6 1 - 2 years Back-end web developer
7 6 - 10 years Back-end web developer
8 6 - 10 years Back-end web developer
9 11+ years Back-end web developer
10 11+ years Back-end web developer
11 2 - 5 years Back-end web developer
12 11+ years Back-end web developer
13 2 - 5 years Back-end web developer
14 2 - 5 years Back-end web developer
15 6 - 10 years Back-end web developer
16 6 - 10 years Back-end web developer
17 2 - 5 years Back-end web developer
18 1 - 2 years Back-end web developer
19 11+ years Back-end web developer
20 6 - 10 years Back-end web developer
21 2 - 5 years Back-end web developer
22 2 - 5 years Back-end web developer
23 2 - 5 years Back-end web developer
24 6 - 10 years Back-end web developer
25 11+ years Back-end web developer
26 2 - 5 years Back-end web developer
27 6 - 10 years Back-end web developer
28 Less than 1 year Back-end web developer
29 6 - 10 years Back-end web developer
... ... ...
26054 6 - 10 years NaN
26055 2 - 5 years NaN
26056 11+ years NaN
26057 6 - 10 years NaN
26059 11+ years NaN
26060 6 - 10 years NaN
26061 6 - 10 years NaN
26062 1 - 2 years NaN
26063 Less than 1 year NaN
26064 NaN NaN
26065 1 - 2 years NaN
26066 NaN NaN
26067 NaN NaN
26068 6 - 10 years NaN
26069 6 - 10 years NaN
26070 2 - 5 years NaN
26071 1 - 2 years NaN
26072 1 - 2 years NaN
26073 NaN NaN
26074 Less than 1 year NaN
26075 2 - 5 years NaN
26076 1 - 2 years NaN
26077 2 - 5 years NaN
26078 NaN NaN
26079 2 - 5 years NaN
26080 NaN NaN
26081 1 - 2 years NaN
26082 NaN NaN
26083 2 - 5 years NaN
26084 NaN NaN
Desktop Operating System Desktop Operating System: write-in \
0 Ubuntu NaN
1 Windows 7 NaN
2 Windows 7 NaN
3 Ubuntu NaN
4 Windows 8 NaN
5 Mac OS X NaN
6 Other Linux NaN
7 Mac OS X NaN
8 Windows 8 NaN
9 Windows 8 NaN
10 Mac OS X NaN
11 Windows 8 NaN
12 Windows 8 NaN
13 Debian NaN
14 Windows 8 NaN
15 Windows 8 NaN
16 Windows 7 windows server 2008 r2
17 Windows 7 NaN
18 Windows 8 NaN
19 Windows 7 NaN
20 Windows 7 NaN
21 Windows 7 NaN
22 NaN Windows Server 2008R2
23 Windows 7 NaN
24 Fedora NaN
25 Windows 8 NaN
26 Mint NaN
27 Windows 7 NaN
28 Windows 8 NaN
29 Windows 7 NaN
... ... ...
26054 Mac OS X NaN
26055 NaN NaN
26056 NaN NaN
26057 NaN NaN
26059 Windows 7 NaN
26060 NaN NaN
26061 NaN NaN
26062 NaN NaN
26063 NaN NaN
26064 NaN NaN
26065 NaN NaN
26066 NaN NaN
26067 NaN NaN
26068 NaN NaN
26069 NaN NaN
26070 NaN NaN
26071 Windows 8 NaN
26072 NaN NaN
26073 NaN NaN
26074 NaN NaN
26075 NaN NaN
26076 NaN NaN
26077 NaN NaN
26078 NaN NaN
26079 NaN NaN
26080 NaN NaN
26081 NaN NaN
26082 NaN NaN
26083 NaN NaN
26084 NaN NaN
Current Lang & Tech: Android Current Lang & Tech: Arduino ... \
0 NaN NaN ...
1 NaN NaN ...
2 NaN NaN ...
3 NaN NaN ...
4 NaN NaN ...
5 NaN NaN ...
6 NaN NaN ...
7 NaN NaN ...
8 NaN NaN ...
9 Android Arduino / Raspberry Pi ...
10 Android Arduino / Raspberry Pi ...
11 NaN NaN ...
12 NaN NaN ...
13 NaN NaN ...
14 NaN NaN ...
15 NaN NaN ...
16 NaN NaN ...
17 NaN NaN ...
18 NaN NaN ...
19 NaN NaN ...
20 NaN NaN ...
21 NaN NaN ...
22 NaN NaN ...
23 NaN NaN ...
24 NaN NaN ...
25 NaN NaN ...
26 NaN NaN ...
27 NaN NaN ...
28 NaN NaN ...
29 NaN NaN ...
... ... ... ...
26054 NaN NaN ...
26055 NaN NaN ...
26056 NaN NaN ...
26057 NaN NaN ...
26059 NaN NaN ...
26060 NaN NaN ...
26061 NaN NaN ...
26062 NaN NaN ...
26063 NaN NaN ...
26064 NaN NaN ...
26065 NaN NaN ...
26066 NaN NaN ...
26067 NaN NaN ...
26068 NaN NaN ...
26069 NaN NaN ...
26070 NaN NaN ...
26071 Android NaN ...
26072 NaN NaN ...
26073 NaN NaN ...
26074 NaN NaN ...
26075 NaN NaN ...
26076 NaN NaN ...
26077 NaN NaN ...
26078 NaN NaN ...
26079 NaN NaN ...
26080 NaN NaN ...
26081 NaN NaN ...
26082 NaN NaN ...
26083 NaN NaN ...
26084 NaN NaN ...
How often are Stack Overflow's answers helpful \
0 Usually
1 Usually
2 Rarely
3 Usually
4 Usually
5 Sometimes
6 NaN
7 Usually
8 Usually
9 NaN
10 Always
11 Usually
12 Usually
13 Usually
14 Usually
15 Usually
16 Sometimes
17 Always
18 NaN
19 Usually
20 Sometimes
21 NaN
22 Usually
23 Usually
24 Usually
25 Usually
26 NaN
27 Usually
28 Always
29 Usually
... ...
26054 Sometimes
26055 NaN
26056 NaN
26057 NaN
26059 NaN
26060 NaN
26061 NaN
26062 NaN
26063 NaN
26064 NaN
26065 NaN
26066 NaN
26067 NaN
26068 NaN
26069 NaN
26070 NaN
26071 NaN
26072 NaN
26073 NaN
26074 NaN
26075 NaN
26076 NaN
26077 NaN
26078 NaN
26079 NaN
26080 NaN
26081 NaN
26082 NaN
26083 NaN
26084 NaN
Why answer: Help a programmer in need \
0 NaN
1 NaN
2 NaN
3 It feels good to help a programmer in need
4 It feels good to help a programmer in need
5 It feels good to help a programmer in need
6 NaN
7 It feels good to help a programmer in need
8 It feels good to help a programmer in need
9 NaN
10 It feels good to help a programmer in need
11 It feels good to help a programmer in need
12 It feels good to help a programmer in need
13 It feels good to help a programmer in need
14 It feels good to help a programmer in need
15 It feels good to help a programmer in need
16 It feels good to help a programmer in need
17 NaN
18 NaN
19 NaN
20 NaN
21 NaN
22 It feels good to help a programmer in need
23 It feels good to help a programmer in need
24 It feels good to help a programmer in need
25 NaN
26 NaN
27 It feels good to help a programmer in need
28 It feels good to help a programmer in need
29 It feels good to help a programmer in need
... ...
26054 It feels good to help a programmer in need
26055 NaN
26056 NaN
26057 NaN
26059 NaN
26060 NaN
26061 NaN
26062 NaN
26063 NaN
26064 NaN
26065 NaN
26066 NaN
26067 NaN
26068 NaN
26069 NaN
26070 NaN
26071 NaN
26072 NaN
26073 NaN
26074 NaN
26075 NaN
26076 NaN
26077 NaN
26078 NaN
26079 NaN
26080 NaN
26081 NaN
26082 NaN
26083 NaN
26084 NaN
Why answer: Help future programmers \
0 NaN
1 My answer will help lots of people who have th...
2 NaN
3 My answer will help lots of people who have th...
4 NaN
5 My answer will help lots of people who have th...
6 NaN
7 My answer will help lots of people who have th...
8 My answer will help lots of people who have th...
9 NaN
10 NaN
11 NaN
12 NaN
13 My answer will help lots of people who have th...
14 My answer will help lots of people who have th...
15 My answer will help lots of people who have th...
16 NaN
17 My answer will help lots of people who have th...
18 NaN
19 NaN
20 My answer will help lots of people who have th...
21 NaN
22 NaN
23 My answer will help lots of people who have th...
24 My answer will help lots of people who have th...
25 NaN
26 NaN
27 My answer will help lots of people who have th...
28 NaN
29 NaN
... ...
26054 My answer will help lots of people who have th...
26055 NaN
26056 NaN
26057 NaN
26059 NaN
26060 NaN
26061 NaN
26062 NaN
26063 NaN
26064 NaN
26065 NaN
26066 NaN
26067 NaN
26068 NaN
26069 NaN
26070 NaN
26071 NaN
26072 NaN
26073 NaN
26074 NaN
26075 NaN
26076 NaN
26077 NaN
26078 NaN
26079 NaN
26080 NaN
26081 NaN
26082 NaN
26083 NaN
26084 NaN
Why answer: Demonstrate expertise \
0 It feels good to demonstrate my expertise.
1 It feels good to demonstrate my expertise.
2 NaN
3 It feels good to demonstrate my expertise.
4 NaN
5 NaN
6 NaN
7 It feels good to demonstrate my expertise.
8 It feels good to demonstrate my expertise.
9 NaN
10 NaN
11 It feels good to demonstrate my expertise.
12 NaN
13 NaN
14 NaN
15 NaN
16 NaN
17 NaN
18 NaN
19 NaN
20 NaN
21 NaN
22 It feels good to demonstrate my expertise.
23 NaN
24 It feels good to demonstrate my expertise.
25 NaN
26 NaN
27 NaN
28 NaN
29 NaN
... ...
26054 It feels good to demonstrate my expertise.
26055 NaN
26056 NaN
26057 NaN
26059 NaN
26060 NaN
26061 NaN
26062 NaN
26063 NaN
26064 NaN
26065 NaN
26066 NaN
26067 NaN
26068 NaN
26069 NaN
26070 NaN
26071 NaN
26072 NaN
26073 NaN
26074 NaN
26075 NaN
26076 NaN
26077 NaN
26078 NaN
26079 NaN
26080 NaN
26081 NaN
26082 NaN
26083 NaN
26084 NaN
Why answer: Self promotion \
0 Demonstrating my expertise will benefit me
1 NaN
2 Demonstrating my expertise will benefit me
3 Demonstrating my expertise will benefit me
4 Demonstrating my expertise will benefit me
5 NaN
6 NaN
7 Demonstrating my expertise will benefit me
8 NaN
9 NaN
10 NaN
11 Demonstrating my expertise will benefit me
12 NaN
13 NaN
14 NaN
15 NaN
16 NaN
17 NaN
18 NaN
19 NaN
20 Demonstrating my expertise will benefit me
21 NaN
22 NaN
23 NaN
24 NaN
25 NaN
26 NaN
27 NaN
28 NaN
29 NaN
... ...
26054 Demonstrating my expertise will benefit me
26055 NaN
26056 NaN
26057 NaN
26059 NaN
26060 NaN
26061 NaN
26062 NaN
26063 NaN
26064 NaN
26065 NaN
26066 NaN
26067 NaN
26068 NaN
26069 NaN
26070 NaN
26071 NaN
26072 NaN
26073 NaN
26074 NaN
26075 NaN
26076 NaN
26077 NaN
26078 NaN
26079 NaN
26080 NaN
26081 NaN
26082 NaN
26083 NaN
26084 NaN
Why answer: Sense of responsibility to developers Why answer: No idea \
0 I feel a sense of responsibility to the develo... NaN
1 NaN NaN
2 NaN NaN
3 I feel a sense of responsibility to the develo... NaN
4 NaN NaN
5 I feel a sense of responsibility to the develo... NaN
6 NaN NaN
7 I feel a sense of responsibility to the develo... NaN
8 NaN NaN
9 NaN NaN
10 NaN NaN
11 NaN NaN
12 NaN NaN
13 NaN NaN
14 NaN NaN
15 I feel a sense of responsibility to the develo... NaN
16 NaN NaN
17 I feel a sense of responsibility to the develo... NaN
18 NaN NaN
19 NaN NaN
20 I feel a sense of responsibility to the develo... NaN
21 NaN NaN
22 NaN NaN
23 NaN NaN
24 NaN NaN
25 NaN NaN
26 NaN NaN
27 I feel a sense of responsibility to the develo... NaN
28 I feel a sense of responsibility to the develo... NaN
29 NaN NaN
... ... ...
26054 I feel a sense of responsibility to the develo... NaN
26055 NaN NaN
26056 NaN NaN
26057 NaN NaN
26059 NaN NaN
26060 NaN NaN
26061 NaN NaN
26062 NaN NaN
26063 NaN NaN
26064 NaN NaN
26065 NaN NaN
26066 NaN NaN
26067 NaN NaN
26068 NaN NaN
26069 NaN NaN
26070 NaN NaN
26071 NaN NaN
26072 NaN NaN
26073 NaN NaN
26074 NaN NaN
26075 NaN NaN
26076 NaN NaN
26077 NaN NaN
26078 NaN NaN
26079 NaN NaN
26080 NaN NaN
26081 NaN NaN
26082 NaN NaN
26083 NaN NaN
26084 NaN NaN
Why answer: I don't answer and I don't want to \
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
5 NaN
6 NaN
7 NaN
8 NaN
9 NaN
10 NaN
11 NaN
12 NaN
13 NaN
14 NaN
15 NaN
16 NaN
17 NaN
18 NaN
19 NaN
20 NaN
21 NaN
22 NaN
23 NaN
24 NaN
25 I don't answer much (or at all), and I don't w...
26 NaN
27 NaN
28 NaN
29 NaN
... ...
26054 NaN
26055 NaN
26056 NaN
26057 NaN
26059 NaN
26060 NaN
26061 NaN
26062 NaN
26063 NaN
26064 NaN
26065 NaN
26066 NaN
26067 NaN
26068 NaN
26069 NaN
26070 NaN
26071 NaN
26072 NaN
26073 NaN
26074 NaN
26075 NaN
26076 NaN
26077 NaN
26078 NaN
26079 NaN
26080 NaN
26081 NaN
26082 NaN
26083 NaN
26084 NaN
Why answer: I don't answer but I want to AgeScale
0 NaN 2.0
1 NaN 1.0
2 NaN 1.0
3 NaN 2.0
4 I don't answer much (or at all), but I want to... 3.0
5 NaN 4.0
6 NaN 0.0
7 I don't answer much (or at all), but I want to... 2.0
8 NaN 3.0
9 NaN 4.0
10 NaN 4.0
11 NaN 2.0
12 NaN 5.0
13 NaN 2.0
14 NaN 1.0
15 NaN 2.0
16 NaN 2.0
17 I don't answer much (or at all), but I want to... 3.0
18 NaN 1.0
19 I don't answer much (or at all), but I want to... 4.0
20 NaN 3.0
21 NaN 2.0
22 I don't answer much (or at all), but I want to... 3.0
23 NaN 2.0
24 I don't answer much (or at all), but I want to... 3.0
25 NaN 4.0
26 NaN 2.0
27 NaN 3.0
28 NaN 2.0
29 NaN 3.0
... ... ...
26054 NaN 2.0
26055 NaN 1.0
26056 NaN 7.0
26057 NaN 3.0
26059 NaN 5.0
26060 NaN 3.0
26061 NaN 3.0
26062 NaN 2.0
26063 NaN 1.0
26064 NaN 3.0
26065 NaN 1.0
26066 NaN 1.0
26067 NaN 2.0
26068 NaN 2.0
26069 NaN 3.0
26070 NaN 2.0
26071 NaN 1.0
26072 NaN 1.0
26073 NaN 2.0
26074 NaN 1.0
26075 NaN 2.0
26076 NaN 1.0
26077 NaN 1.0
26078 NaN 1.0
26079 NaN NaN
26080 NaN 0.0
26081 NaN 1.0
26082 NaN 2.0
26083 NaN 2.0
26084 NaN NaN
[25747 rows x 223 columns]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-91-c8417b663083> in <module>()
----> 1 print(years_df).head()
AttributeError: 'NoneType' object has no attribute 'head'
In [92]:
years_df['Years IT / Programming Experience'].value_counts()
Out[92]:
2 - 5 years 7964
11+ years 5900
6 - 10 years 5722
1 - 2 years 3324
Less than 1 year 1617
Name: Years IT / Programming Experience, dtype: int64
In [93]:
years_df['ExperienceRank'] = years_df['Years IT / Programming Experience'].apply(str).replace("Less than 1 year", "0").apply(str).replace("1 - 2 years", "1").apply(str).replace("2 - 5 years", "2").apply(str).replace("6 - 10 years", "3").apply(str).replace("11+ years", "4").astype(float)
/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
if __name__ == '__main__':
In [94]:
# years_df.head()
In [95]:
years_df['ExperienceRank'].value_counts()
Out[95]:
2.0 7964
4.0 5900
3.0 5722
1.0 3324
0.0 1617
Name: ExperienceRank, dtype: int64
In [96]:
years_df['AgeScale'].value_counts()
Out[96]:
2.0 7365
1.0 6339
3.0 4593
4.0 2357
0.0 2261
5.0 1965
6.0 488
7.0 124
Name: AgeScale, dtype: int64
In [100]:
#years_df['ExperienceRank'] = float(years_df['ExperienceRank'])
# years_df['AgeScale'] = float(years_df['AgeScale'])
# years_df['AgeScale'] = years_df['AgeScale'].apply(int)
#years_df['ExperienceRank'] = parseInt(years_df['ExperienceRank'])
#years_df['ExperienceRank'] = pd.Series(years_df['ExperienceRank'])
#years_df['AgeScale'] = pd.Series(years_df['AgeScale'])
moneyScatter = years_df.plot(kind='scatter', x='ExperienceRank', y='AgeScale', alpha=0.2) # caegorical data dos not display well on scatter plots
#moneyScatter.set_title('Distribution of Money Spent Amongst Respondents to the Survey by Age')
#moneyScatter.set_xlabel('Months Programming')
#moneyScatter.set_ylabel('Hours Spent Learning Each Week')
#plt.savefig('studyingovertime.png')
In [59]:
years_df['ExperienceRank'].describe()
Out[59]:
count 25747
unique 6
top 2
freq 7964
Name: ExperienceRank, dtype: object
In [101]:
years_df[['ExperienceRank','AgeScale']] = years_df[['ExperienceRank','AgeScale']].apply(pd.to_numeric)
# years_df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
/Users/Monica/.virtualenvs/dataanalysis/lib/python3.5/site-packages/pandas/core/frame.py:2378: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
self[k1] = value[k2]
In [66]:
years_df['ExperienceRank'].describe()
Out[66]:
count 25747
unique 6
top 2
freq 7964
Name: ExperienceRank, dtype: object
In [67]:
years_df['ExperienceRank'].head()
Out[67]:
0 2
1 1
2 1
3 3
4 2
Name: ExperienceRank, dtype: object
In [102]:
years_df['AgeScale'].head()
Out[102]:
0 2.0
1 1.0
2 1.0
3 2.0
4 3.0
Name: AgeScale, dtype: float64
In [ ]:
Content source: M0nica/python-foundations-hw
Similar notebooks: