In [2]:
import pandas as pd
%matplotlib inline
In [4]:
job_data = pd.read_csv("../web-scraping/jobs-data.csv")
job_data.head()
Out[4]:
Now that the data is loaded, we can do analysis.
In [5]:
job_data.dtypes
Out[5]:
In [ ]:
job_data.shape
In [ ]:
job_data['job_title'].value_counts(ascending=False)
In [ ]:
job_data['company_name'].value_counts(ascending=False)
In [ ]:
job_data['job_location'].value_counts(ascending=False)
In [ ]:
job_data['state'] = job_data['job_location'].str.extract(', (\w{2})', expand=False)
job_data.head()
In [ ]:
ax = job_data['state'].value_counts(ascending=True).plot(kind="barh", figsize=(10,10), xlim=(0,450))
# add counts as annotations
# http://stackoverflow.com/questions/23591254/python-pandas-matplotlib-annotating-labels-above-bar-chart-columns
for p in ax.patches:
ax.annotate("%d" % p.get_width(), (p.get_x() + p.get_width(), p.get_y()), xytext=(0, 0), textcoords='offset points')
In [ ]:
job_data[job_data['job_location'].str.contains("Pittsburgh")]
In [7]:
company_data = pd.read_csv("company-data.csv")
company_data.head()
Out[7]:
In [8]:
company_data.dtypes
Out[8]:
In [9]:
company_data.shape
Out[9]:
In [10]:
company_data.describe()
Out[10]:
In [11]:
company_data['overall_rating'].hist()
Out[11]:
In [12]:
company_data['culture_rating'].hist()
Out[12]:
In [13]:
company_data['compensation_benefits_rating'].hist()
Out[13]:
In [14]:
company_data['management_rating'].hist()
Out[14]:
In [6]:
company_data['js_advancement_rating'].hist()
In [ ]: