$\large{\text{LESSON 1 - Data Extraction Fundamentals}}$

simple2.py - CSV QUIZ


In [10]:
import os

DATADIR = ""
DATAFILE = "beatles-diskography.csv"


def parse_file(datafile):
    data = []
    with open(datafile, "r") as f:
        header = f.readline().split(",")
        counter = 0
        for line in f:
            if counter == 10:
                break

            fields = line.split(",")
            entry = {}

            for key, value in enumerate(fields):
                entry[header[key].strip()] = value.strip()

            data.append(entry)
            counter += 1
    return data


def test():
    # a simple test of your implemetation
    datafile = os.path.join(DATADIR, DATAFILE)
    d = parse_file(datafile)
    firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum', 'BPI Certification': 'Gold'}
    tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964', 'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}

    assert d[0] == firstline
    assert d[9] == tenthline
    print "Well done!"

test()


Well done!

Pandas test visualization


In [3]:
import pandas as pd
data = pd.read_excel('2013_ERCOT_Hourly_Load_Data.xls')

In [4]:
data


Out[4]:
Hour_End COAST EAST FAR_WEST NORTH NORTH_C SOUTHERN SOUTH_C WEST ERCOT
0 2013-01-01 01:00:00 7606.263544 1073.892438 1411.750567 784.978166 10369.094390 2206.675077 4368.490945 882.931901 28704.077028
1 2013-01-01 02:00:00 7388.082714 1035.021938 1403.472287 776.307387 10152.358518 2159.733208 4233.587967 872.404750 28020.968769
2 2013-01-01 03:00:00 7178.867878 1036.088697 1395.053150 768.125748 9988.051418 2065.114706 4082.862860 868.853938 27383.018395
3 2013-01-01 04:00:00 7038.822581 1032.648841 1395.508820 770.937969 9946.658655 1990.903699 4010.489608 865.701201 27051.671374
4 2013-01-01 05:00:00 6990.857940 1042.823044 1401.216842 779.089313 10096.664190 1954.807585 4038.655997 879.924249 27184.039160
5 2013-01-01 06:00:00 7049.790189 1069.143383 1419.459423 799.665525 10470.765865 1961.811658 4174.012741 911.110806 27855.759590
6 2013-01-01 07:00:00 7176.995344 1109.020961 1444.363323 815.012163 10890.047390 1994.286651 4360.952348 950.713346 28741.391526
7 2013-01-01 08:00:00 7222.680062 1148.715731 1463.184105 834.538850 11189.399850 2041.821641 4522.107314 989.528170 29411.975723
8 2013-01-01 09:00:00 7313.827285 1196.908758 1474.805411 861.583704 11626.469691 2131.386752 4688.309905 1021.078459 30314.369965
9 2013-01-01 10:00:00 7676.263088 1235.782596 1500.837360 891.112637 12309.874203 2249.794897 4992.413422 1047.999363 31904.077566
10 2013-01-01 11:00:00 8060.585862 1406.429542 1507.771083 914.011797 12934.210219 2341.648420 5270.261501 1048.641390 33483.559814
11 2013-01-01 12:00:00 8392.872943 1342.000994 1489.980768 925.364606 13200.599637 2390.554533 5427.138224 1040.214296 34208.726001
12 2013-01-01 13:00:00 8508.460276 1328.377275 1467.224262 916.297551 13061.037603 2419.443410 5449.833387 1023.161083 34173.834847
13 2013-01-01 14:00:00 8497.619547 1319.834144 1439.254169 902.870130 12849.925561 2392.047501 5401.314027 996.804163 33799.669242
14 2013-01-01 15:00:00 8466.284440 1296.620657 1419.779150 891.508292 12723.703402 2359.218652 5287.699870 974.873096 33419.687559
15 2013-01-01 16:00:00 8430.930261 1298.205636 1410.692733 891.197389 12609.052314 2359.257971 5225.694637 964.864918 33189.895859
16 2013-01-01 17:00:00 8559.857376 1320.744020 1424.581575 907.790490 13027.142913 2373.885422 5319.840547 987.853516 33921.695859
17 2013-01-01 18:00:00 9094.197766 1420.351725 1467.447175 952.911360 14089.022371 2494.392376 5756.908907 1051.774633 36327.006313
18 2013-01-01 19:00:00 9538.663747 1481.982153 1543.823350 981.606015 14736.170388 2732.596561 6293.955790 1128.531905 38437.329909
19 2013-01-01 20:00:00 9561.898402 1482.735078 1567.126217 979.691301 14707.857423 2756.554973 6349.715383 1141.573607 38547.152384
20 2013-01-01 21:00:00 9511.658059 1468.031666 1574.662139 972.511889 14595.258933 2740.982401 6323.193811 1143.911935 38330.210833
21 2013-01-01 22:00:00 9330.179731 1383.801664 1564.786471 949.749198 14242.974339 2668.150569 6196.420937 1126.713944 37462.776853
22 2013-01-01 23:00:00 8959.046719 1350.506171 1535.767726 910.123028 13607.988176 2550.204287 5920.110001 1092.905426 35926.651534
23 2013-01-02 00:00:00 8504.496355 1320.851929 1501.874384 875.170231 12942.577593 2400.974199 5615.988877 1056.346427 34218.279995
24 2013-01-02 01:00:00 8100.214418 1272.276148 1519.359103 848.245625 12448.299575 2363.740902 5430.177584 1031.985455 33014.298810
25 2013-01-02 02:00:00 7936.352567 1254.130609 1515.812937 846.223761 12282.546228 2278.975641 5377.343847 1032.567915 32523.953505
26 2013-01-02 03:00:00 7903.615992 1253.810187 1520.716566 856.478341 12295.875695 2242.241119 5398.590964 1041.710254 32513.039118
27 2013-01-02 04:00:00 7979.405935 1274.255582 1535.286403 868.016166 12524.200370 2236.229009 5514.649191 1062.698731 32994.741387
28 2013-01-02 05:00:00 8235.651256 1313.231147 1550.100805 888.626638 13057.202718 2320.550369 5759.632500 1097.139085 34222.134518
29 2013-01-02 06:00:00 8842.350775 1387.986754 1598.118699 936.446825 14042.490507 2504.837316 6269.860822 1171.677476 36753.769174
... ... ... ... ... ... ... ... ... ... ...
7265 2013-10-30 19:00:00 12356.838365 1478.279866 1652.349934 839.386660 12242.573325 3804.591782 7363.100253 1084.058375 40821.178560
7266 2013-10-30 20:00:00 12643.616329 1429.000430 1677.261826 841.102092 12440.944420 3881.526608 7514.330514 1105.607687 41533.389906
7267 2013-10-30 21:00:00 12549.145776 1427.475070 1676.887773 827.986154 12291.174225 3819.569852 7347.192413 1089.507634 41028.938897
7268 2013-10-30 22:00:00 12154.444684 1362.162789 1644.224653 797.736560 11760.213564 3654.417471 6947.615167 1050.342332 39371.157220
7269 2013-10-30 23:00:00 11465.386657 1233.037790 1589.421337 749.766935 10901.220933 3409.625488 6407.567607 990.082261 36746.109008
7270 2013-10-31 00:00:00 10705.746521 1141.692619 1520.178114 695.352157 9911.833589 3118.286867 5786.519177 913.314422 33792.923466
7271 2013-10-31 01:00:00 10219.236203 1057.958649 1507.881571 661.128631 9084.938508 2869.668717 5312.791741 856.579056 31570.183076
7272 2013-10-31 02:00:00 9937.995494 1018.220843 1482.156527 640.263917 8665.608446 2729.253998 5007.685070 823.852219 30305.036514
7273 2013-10-31 03:00:00 9738.822632 980.756503 1468.652671 628.974525 8442.565636 2641.809287 4817.698468 803.896617 29523.176339
7274 2013-10-31 04:00:00 9627.243910 1000.919828 1465.213477 624.467574 8334.029417 2578.682939 4690.863767 795.082665 29116.503577
7275 2013-10-31 05:00:00 9794.616211 973.997789 1469.850959 622.181556 8534.107592 2565.021862 4673.425190 803.045689 29436.246848
7276 2013-10-31 06:00:00 10361.874780 1072.067675 1495.235002 653.953744 9279.080178 2676.007993 4945.967164 842.896782 31327.083318
7277 2013-10-31 07:00:00 11195.488886 1238.882811 1572.199575 718.355849 10700.464265 2981.946003 5576.320053 928.252921 34911.910363
7278 2013-10-31 08:00:00 11374.885876 1298.344519 1628.794748 769.617594 11533.729969 3094.886586 5974.388546 976.355904 36651.003742
7279 2013-10-31 09:00:00 11026.868773 1311.085638 1614.581218 763.396511 11397.863169 3021.311754 5926.221062 960.268030 36021.596155
7280 2013-10-31 10:00:00 10863.148881 1304.422103 1612.336341 781.991829 11620.790910 3084.574384 5979.025613 978.380289 36224.670350
7281 2013-10-31 11:00:00 10882.551137 1330.275048 1605.566259 768.346901 11888.864632 3209.900030 6235.120343 995.038575 36915.662925
7282 2013-10-31 12:00:00 10868.903881 1378.111740 1610.992290 777.690361 11938.522920 3336.723809 6491.809708 1003.729439 37406.484148
7283 2013-10-31 13:00:00 10743.727260 1407.438216 1605.714464 763.589926 11935.824605 3439.932341 6624.019123 1005.339983 37525.585918
7284 2013-10-31 14:00:00 10683.681008 1429.054662 1619.842602 777.051834 11945.183751 3580.448974 6748.436811 1015.816451 37799.516093
7285 2013-10-31 15:00:00 10640.661747 1447.143464 1624.280511 756.682005 11956.949892 3735.759017 6824.997019 1023.909839 38010.383494
7286 2013-10-31 16:00:00 10827.919174 1451.253257 1630.981695 776.037474 11864.402366 3856.256675 6885.984232 1033.101791 38325.936664
7287 2013-10-31 17:00:00 11060.636443 1440.391423 1640.213061 768.642479 11856.466411 3909.452995 6928.671390 1041.997671 38646.471873
7288 2013-10-31 18:00:00 10901.529049 1406.108571 1621.070318 749.244482 11606.224789 3795.544020 6785.377200 1024.393509 37889.491938
7289 2013-10-31 19:00:00 10537.359103 1344.198943 1586.725584 744.978254 11231.074062 3575.567072 6331.807447 991.477973 36343.188438
7290 2013-10-31 20:00:00 10457.438471 1345.009169 1589.799891 753.475762 11149.955067 3476.815039 6051.873665 985.198781 35809.565845
7291 2013-10-31 21:00:00 10197.491338 1320.390073 1582.779302 731.693542 10944.940047 3370.840217 5807.145386 969.311320 34924.591225
7292 2013-10-31 22:00:00 9952.318836 1265.091914 1579.842929 725.854341 10668.846644 3278.694596 5589.378691 951.380232 34011.408183
7293 2013-10-31 23:00:00 9294.769146 1172.797264 1556.845958 690.416508 9890.064556 3066.893444 5111.455446 899.185472 31682.427794
7294 2013-11-01 00:00:00 8517.488200 1070.960145 1506.419580 649.738208 8946.722298 2768.016445 4565.169026 833.298991 28857.812893

7295 rows × 10 columns

readxls.py - EXCEL QUIZ


In [37]:
import xlrd

datafile = "2013_ERCOT_Hourly_Load_Data.xls"


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)

    data = [[sheet.cell_value(r, col)
                for col in range(sheet.ncols)]
                    for r in range(sheet.nrows)]
    return data
    
data = parse_file(datafile)

In [112]:
coast = []
for i in range(len(data[:])-1):
    coast.append(data[i+1][1])
    
average= (sum(coast))/(len(coast))

In [111]:
for i, value in enumerate(coast):
    if value == max(coast):
        maxvalue = value
        maxtime = xlrd.xldate_as_tuple(data[i+1][0], 0)
    if value == min(coast):
        minvalue = value
        mintime = xlrd.xldate_as_tuple(data[i+1][0], 0)
        
print maxvalue, maxtime
print minvalue, mintime


18779.02551 (2013, 8, 13, 17, 0, 0)
6602.113899 (2013, 2, 3, 4, 0, 0)

musicbrainz.py - JSON QUIZ


In [8]:
results = query_by_name(ARTIST_URL, query_type["simple"], "First Aid Kit")
    # pretty_print(results)
    count_match = 0

    for artist in results["artists"]:
        # pretty_print(artist)

        if artist["name"] == "First Aid Kit":
            count_match += 1

    print "There are {0} bands named First Aid Kit".format(count_match)

    # Question 2: Begin-Area name for Queen?
    results = query_by_name(ARTIST_URL, query_type["simple"], "Queen")
    print ("The Begin-Area name is ", results["artists"][0]["begin-area"]["name"])

    # Question 3: Spanish alias for The Beatles?
    results = query_by_name(ARTIST_URL, query_type["simple"], "The Beatles")
    # pretty_print(results)

    for alias in results["artists"][0]["aliases"]:
        if alias["locale"] == "es":
            print "The Spanish alias for The Beatles is " + alias["name"]

    # Question 4: Nirvana disambiguation?
    results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
    # pretty_print(results)

    print "The disambiguation for Nirvana is " + results["artists"][0]["disambiguation"]

    # Question 5: Where was One Direction formed?
    results = query_by_name(ARTIST_URL, query_type["simple"], "One Direction")
    # pretty_print(results)

    print "One Direction was formed in " + results["artists"][0]["life-span"]["begin"]


requesting http://musicbrainz.org/ws/2/artist/?query=artist%3AFirst+Aid+Kit&fmt=json
There are 2 bands named First Aid Kit
requesting http://musicbrainz.org/ws/2/artist/?query=artist%3AQueen&fmt=json
('The Begin-Area name is ', u'London')
requesting http://musicbrainz.org/ws/2/artist/?query=artist%3AThe+Beatles&fmt=json
The Spanish alias for The Beatles is Los Beatles
requesting http://musicbrainz.org/ws/2/artist/?query=artist%3ANirvana&fmt=json
The disambiguation for Nirvana is 90s US grunge band
requesting http://musicbrainz.org/ws/2/artist/?query=artist%3AOne+Direction&fmt=json
One Direction was formed in 2010-07

In [ ]: