Import data and select a subset into a DataFrame


In [6]:
import pandas as pd

# read csv into dataframe
df = pd.read_csv('Zip_Zri_MultiFamilyResidenceRental.csv')

# filter dataframe on multnoma county
df['RegionName'][df['CountyName']=='Multnomah County']

# get the last years worth of data
df = df[['RegionName','2019-01','2019-02','2019-03','2019-04','2019-05','2019-06','2019-07','2019-08','2019-09','2019-10','2019-11','2019-12','2020-01']][df['CountyName']=='Multnomah County']

df


Out[6]:
RegionName 2019-01 2019-02 2019-03 2019-04 2019-05 2019-06 2019-07 2019-08 2019-09 2019-10 2019-11 2019-12 2020-01
117 97229 1443.0 1444.0 1438.0 1433.0 1437.0 1435.0 1456.0 1474.0 1491.0 1512.0 1514.0 1517.0 1528.0
396 97206 1403.0 1401.0 1398.0 1393.0 1392.0 1394.0 NaN 1484.0 1497.0 NaN 1574.0 1594.0 1602.0
570 97202 1505.0 1507.0 1496.0 1481.0 1477.0 1482.0 NaN 1598.0 1620.0 1648.0 1674.0 1685.0 1661.0
740 97030 1397.0 1397.0 1387.0 1376.0 1367.0 1360.0 1378.0 1386.0 1398.0 NaN 1493.0 1514.0 1544.0
809 97217 NaN NaN NaN NaN NaN NaN 1575.0 1600.0 1629.0 NaN 1737.0 1719.0 1682.0
956 97209 1727.0 1727.0 1733.0 1731.0 1732.0 1734.0 1742.0 1735.0 1746.0 1772.0 1783.0 1733.0 1750.0
987 97214 1580.0 1570.0 1556.0 1547.0 1548.0 1560.0 NaN 1641.0 1649.0 1633.0 1632.0 1631.0 1636.0
1388 97201 1577.0 1561.0 1550.0 1538.0 1531.0 1529.0 1533.0 1540.0 1547.0 1573.0 1560.0 1549.0 1542.0
1524 97239 1637.0 1638.0 1625.0 1610.0 1606.0 1603.0 1620.0 1630.0 1632.0 1647.0 1668.0 1670.0 1657.0
1648 97232 1641.0 1639.0 1638.0 1629.0 1620.0 1615.0 1645.0 1663.0 1666.0 1664.0 1656.0 1657.0 1678.0

Transpose the dataframe


In [9]:
# transpose 
df = df.T
df


Out[9]:
117 396 570 740 809 956 987 1388 1524 1648
RegionName 97229.0 97206.0 97202.0 97030.0 97217.0 97209.0 97214.0 97201.0 97239.0 97232.0
2019-01 1443.0 1403.0 1505.0 1397.0 NaN 1727.0 1580.0 1577.0 1637.0 1641.0
2019-02 1444.0 1401.0 1507.0 1397.0 NaN 1727.0 1570.0 1561.0 1638.0 1639.0
2019-03 1438.0 1398.0 1496.0 1387.0 NaN 1733.0 1556.0 1550.0 1625.0 1638.0
2019-04 1433.0 1393.0 1481.0 1376.0 NaN 1731.0 1547.0 1538.0 1610.0 1629.0
2019-05 1437.0 1392.0 1477.0 1367.0 NaN 1732.0 1548.0 1531.0 1606.0 1620.0
2019-06 1435.0 1394.0 1482.0 1360.0 NaN 1734.0 1560.0 1529.0 1603.0 1615.0
2019-07 1456.0 NaN NaN 1378.0 1575.0 1742.0 NaN 1533.0 1620.0 1645.0
2019-08 1474.0 1484.0 1598.0 1386.0 1600.0 1735.0 1641.0 1540.0 1630.0 1663.0
2019-09 1491.0 1497.0 1620.0 1398.0 1629.0 1746.0 1649.0 1547.0 1632.0 1666.0
2019-10 1512.0 NaN 1648.0 NaN NaN 1772.0 1633.0 1573.0 1647.0 1664.0
2019-11 1514.0 1574.0 1674.0 1493.0 1737.0 1783.0 1632.0 1560.0 1668.0 1656.0
2019-12 1517.0 1594.0 1685.0 1514.0 1719.0 1733.0 1631.0 1549.0 1670.0 1657.0
2020-01 1528.0 1602.0 1661.0 1544.0 1682.0 1750.0 1636.0 1542.0 1657.0 1678.0

Fix the incorrect column names issue


In [11]:
# create a new header by selecting the first row
new_header = df.iloc[0]

# set the dataframe to everything but the top column
df = df[1:]

# set the dataframe columns equal to the first row selected earlier
df.columns = new_header

df


Out[11]:
2019-01 1443.0 1403.0 1505.0 1397.0 NaN 1727.0 1580.0 1577.0 1637.0 1641.0
2019-02 1444.0 1401.0 1507.0 1397.0 NaN 1727.0 1570.0 1561.0 1638.0 1639.0
2019-03 1438.0 1398.0 1496.0 1387.0 NaN 1733.0 1556.0 1550.0 1625.0 1638.0
2019-04 1433.0 1393.0 1481.0 1376.0 NaN 1731.0 1547.0 1538.0 1610.0 1629.0
2019-05 1437.0 1392.0 1477.0 1367.0 NaN 1732.0 1548.0 1531.0 1606.0 1620.0
2019-06 1435.0 1394.0 1482.0 1360.0 NaN 1734.0 1560.0 1529.0 1603.0 1615.0
2019-07 1456.0 NaN NaN 1378.0 1575.0 1742.0 NaN 1533.0 1620.0 1645.0
2019-08 1474.0 1484.0 1598.0 1386.0 1600.0 1735.0 1641.0 1540.0 1630.0 1663.0
2019-09 1491.0 1497.0 1620.0 1398.0 1629.0 1746.0 1649.0 1547.0 1632.0 1666.0
2019-10 1512.0 NaN 1648.0 NaN NaN 1772.0 1633.0 1573.0 1647.0 1664.0
2019-11 1514.0 1574.0 1674.0 1493.0 1737.0 1783.0 1632.0 1560.0 1668.0 1656.0
2019-12 1517.0 1594.0 1685.0 1514.0 1719.0 1733.0 1631.0 1549.0 1670.0 1657.0
2020-01 1528.0 1602.0 1661.0 1544.0 1682.0 1750.0 1636.0 1542.0 1657.0 1678.0

In [ ]: