Import data and select a subset into a DataFrame
In [6]:
import pandas as pd
# read csv into dataframe
df = pd.read_csv('Zip_Zri_MultiFamilyResidenceRental.csv')
# filter dataframe on multnoma county
df['RegionName'][df['CountyName']=='Multnomah County']
# get the last years worth of data
df = df[['RegionName','2019-01','2019-02','2019-03','2019-04','2019-05','2019-06','2019-07','2019-08','2019-09','2019-10','2019-11','2019-12','2020-01']][df['CountyName']=='Multnomah County']
df
Out[6]:
RegionName
2019-01
2019-02
2019-03
2019-04
2019-05
2019-06
2019-07
2019-08
2019-09
2019-10
2019-11
2019-12
2020-01
117
97229
1443.0
1444.0
1438.0
1433.0
1437.0
1435.0
1456.0
1474.0
1491.0
1512.0
1514.0
1517.0
1528.0
396
97206
1403.0
1401.0
1398.0
1393.0
1392.0
1394.0
NaN
1484.0
1497.0
NaN
1574.0
1594.0
1602.0
570
97202
1505.0
1507.0
1496.0
1481.0
1477.0
1482.0
NaN
1598.0
1620.0
1648.0
1674.0
1685.0
1661.0
740
97030
1397.0
1397.0
1387.0
1376.0
1367.0
1360.0
1378.0
1386.0
1398.0
NaN
1493.0
1514.0
1544.0
809
97217
NaN
NaN
NaN
NaN
NaN
NaN
1575.0
1600.0
1629.0
NaN
1737.0
1719.0
1682.0
956
97209
1727.0
1727.0
1733.0
1731.0
1732.0
1734.0
1742.0
1735.0
1746.0
1772.0
1783.0
1733.0
1750.0
987
97214
1580.0
1570.0
1556.0
1547.0
1548.0
1560.0
NaN
1641.0
1649.0
1633.0
1632.0
1631.0
1636.0
1388
97201
1577.0
1561.0
1550.0
1538.0
1531.0
1529.0
1533.0
1540.0
1547.0
1573.0
1560.0
1549.0
1542.0
1524
97239
1637.0
1638.0
1625.0
1610.0
1606.0
1603.0
1620.0
1630.0
1632.0
1647.0
1668.0
1670.0
1657.0
1648
97232
1641.0
1639.0
1638.0
1629.0
1620.0
1615.0
1645.0
1663.0
1666.0
1664.0
1656.0
1657.0
1678.0
Transpose the dataframe
In [9]:
# transpose
df = df.T
df
Out[9]:
117
396
570
740
809
956
987
1388
1524
1648
RegionName
97229.0
97206.0
97202.0
97030.0
97217.0
97209.0
97214.0
97201.0
97239.0
97232.0
2019-01
1443.0
1403.0
1505.0
1397.0
NaN
1727.0
1580.0
1577.0
1637.0
1641.0
2019-02
1444.0
1401.0
1507.0
1397.0
NaN
1727.0
1570.0
1561.0
1638.0
1639.0
2019-03
1438.0
1398.0
1496.0
1387.0
NaN
1733.0
1556.0
1550.0
1625.0
1638.0
2019-04
1433.0
1393.0
1481.0
1376.0
NaN
1731.0
1547.0
1538.0
1610.0
1629.0
2019-05
1437.0
1392.0
1477.0
1367.0
NaN
1732.0
1548.0
1531.0
1606.0
1620.0
2019-06
1435.0
1394.0
1482.0
1360.0
NaN
1734.0
1560.0
1529.0
1603.0
1615.0
2019-07
1456.0
NaN
NaN
1378.0
1575.0
1742.0
NaN
1533.0
1620.0
1645.0
2019-08
1474.0
1484.0
1598.0
1386.0
1600.0
1735.0
1641.0
1540.0
1630.0
1663.0
2019-09
1491.0
1497.0
1620.0
1398.0
1629.0
1746.0
1649.0
1547.0
1632.0
1666.0
2019-10
1512.0
NaN
1648.0
NaN
NaN
1772.0
1633.0
1573.0
1647.0
1664.0
2019-11
1514.0
1574.0
1674.0
1493.0
1737.0
1783.0
1632.0
1560.0
1668.0
1656.0
2019-12
1517.0
1594.0
1685.0
1514.0
1719.0
1733.0
1631.0
1549.0
1670.0
1657.0
2020-01
1528.0
1602.0
1661.0
1544.0
1682.0
1750.0
1636.0
1542.0
1657.0
1678.0
Fix the incorrect column names issue
In [11]:
# create a new header by selecting the first row
new_header = df.iloc[0]
# set the dataframe to everything but the top column
df = df[1:]
# set the dataframe columns equal to the first row selected earlier
df.columns = new_header
df
Out[11]:
2019-01
1443.0
1403.0
1505.0
1397.0
NaN
1727.0
1580.0
1577.0
1637.0
1641.0
2019-02
1444.0
1401.0
1507.0
1397.0
NaN
1727.0
1570.0
1561.0
1638.0
1639.0
2019-03
1438.0
1398.0
1496.0
1387.0
NaN
1733.0
1556.0
1550.0
1625.0
1638.0
2019-04
1433.0
1393.0
1481.0
1376.0
NaN
1731.0
1547.0
1538.0
1610.0
1629.0
2019-05
1437.0
1392.0
1477.0
1367.0
NaN
1732.0
1548.0
1531.0
1606.0
1620.0
2019-06
1435.0
1394.0
1482.0
1360.0
NaN
1734.0
1560.0
1529.0
1603.0
1615.0
2019-07
1456.0
NaN
NaN
1378.0
1575.0
1742.0
NaN
1533.0
1620.0
1645.0
2019-08
1474.0
1484.0
1598.0
1386.0
1600.0
1735.0
1641.0
1540.0
1630.0
1663.0
2019-09
1491.0
1497.0
1620.0
1398.0
1629.0
1746.0
1649.0
1547.0
1632.0
1666.0
2019-10
1512.0
NaN
1648.0
NaN
NaN
1772.0
1633.0
1573.0
1647.0
1664.0
2019-11
1514.0
1574.0
1674.0
1493.0
1737.0
1783.0
1632.0
1560.0
1668.0
1656.0
2019-12
1517.0
1594.0
1685.0
1514.0
1719.0
1733.0
1631.0
1549.0
1670.0
1657.0
2020-01
1528.0
1602.0
1661.0
1544.0
1682.0
1750.0
1636.0
1542.0
1657.0
1678.0
In [ ]:
Content source: PDXKor/PDXKor.github.io
Similar notebooks: