In [16]:
from ibmdbpy import IdaDataBase, IdaDataFrame

# @hidden_cell
# This connection object is used to access your data and contains your credentials.
# You might want to remove those credentials before you share your notebook.
idadb_5d05821b4dd8478b9b0a93ae9703695b = IdaDataBase(dsn='DASHDB;Database=BLUDB;Hostname=awh-yp-small02.services.dal.bluemix.net;Port=50000;PROTOCOL=TCPIP;UID=dash111495;PWD=#T9Yts@ctXM1')

df = IdaDataFrame(idadb_5d05821b4dd8478b9b0a93ae9703695b, 'DASH111495.IMAGE_DATA').as_dataframe()
df.head()

# You can close the database connection with the following code. Please keep the comment line with the @hidden_cell tag,
# because the close function displays parts of the credentials.
# @hidden_cell
# idadb_5d05821b4dd8478b9b0a93ae9703695b.close()
# To learn more about the ibmdby package, please read the documentation: http://pythonhosted.org/ibmdbpy/


Out[16]:
red row 0 col 0 green row 0 col 0 blue row 0 col 0 red row 0 col 1 green row 0 col 1 blue row 0 col 1 red row 0 col 2 green row 0 col 2 blue row 0 col 2 red row 0 col 3 ... red row 9 col 7 green row 9 col 7 blue row 9 col 7 red row 9 col 8 green row 9 col 8 blue row 9 col 8 red row 9 col 9 green row 9 col 9 blue row 9 col 9 car
0 240 239 233 177 181 181 169 174 175 187 ... 23 20 27 24 21 28 27 24 31 true
1 120 147 131 110 126 62 122 144 79 117 ... 176 169 163 181 174 168 172 165 159 true
2 164 174 179 183 193 198 177 185 188 76 ... 50 49 44 58 56 52 55 54 50 true
3 211 215 221 216 221 227 223 228 234 226 ... 115 82 43 167 148 128 141 141 113 true
4 185 186 185 187 190 192 185 190 197 188 ... 15 15 15 17 17 16 11 12 11 true

5 rows × 301 columns


In [9]:
df.shape


Out[9]:
(33144, 301)

In [44]:
df_cars = df.loc[df['car'] == 'true']
df_cars.head()


Out[44]:
red row 0 col 0 green row 0 col 0 blue row 0 col 0 red row 0 col 1 green row 0 col 1 blue row 0 col 1 red row 0 col 2 green row 0 col 2 blue row 0 col 2 red row 0 col 3 ... red row 9 col 7 green row 9 col 7 blue row 9 col 7 red row 9 col 8 green row 9 col 8 blue row 9 col 8 red row 9 col 9 green row 9 col 9 blue row 9 col 9 car
0 240 239 233 177 181 181 169 174 175 187 ... 23 20 27 24 21 28 27 24 31 true
1 120 147 131 110 126 62 122 144 79 117 ... 176 169 163 181 174 168 172 165 159 true
2 164 174 179 183 193 198 177 185 188 76 ... 50 49 44 58 56 52 55 54 50 true
3 211 215 221 216 221 227 223 228 234 226 ... 115 82 43 167 148 128 141 141 113 true
4 185 186 185 187 190 192 185 190 197 188 ... 15 15 15 17 17 16 11 12 11 true

5 rows × 301 columns


In [43]:
df_noncars = df.loc[df['car'] == 'false']
df_noncars.head()


Out[43]:
red row 0 col 0 green row 0 col 0 blue row 0 col 0 red row 0 col 1 green row 0 col 1 blue row 0 col 1 red row 0 col 2 green row 0 col 2 blue row 0 col 2 red row 0 col 3 ... red row 9 col 7 green row 9 col 7 blue row 9 col 7 red row 9 col 8 green row 9 col 8 blue row 9 col 8 red row 9 col 9 green row 9 col 9 blue row 9 col 9 car
109 54 39 39 72 56 57 85 69 70 99 ... 75 60 57 72 57 58 65 53 54 false
110 85 85 84 79 76 74 135 127 124 115 ... 99 87 83 84 89 96 208 203 204 false
111 56 55 46 61 57 48 64 60 51 64 ... 32 43 44 42 56 60 36 50 56 false
173 142 149 149 198 200 200 114 104 101 91 ... 101 93 79 147 158 160 173 190 200 false
174 2 2 0 17 17 18 44 41 39 34 ... 122 123 128 171 158 135 163 151 141 false

5 rows × 301 columns


In [24]:
df_cars.describe()


Out[24]:
red row 0 col 0 green row 0 col 0 blue row 0 col 0 red row 0 col 1 green row 0 col 1 blue row 0 col 1 red row 0 col 2 green row 0 col 2 blue row 0 col 2 red row 0 col 3 ... blue row 9 col 6 red row 9 col 7 green row 9 col 7 blue row 9 col 7 red row 9 col 8 green row 9 col 8 blue row 9 col 8 red row 9 col 9 green row 9 col 9 blue row 9 col 9
count 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 ... 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000 8144.000000
mean 140.625123 144.181729 143.964514 143.477161 146.830673 146.330796 145.524926 148.792731 148.290398 146.841724 ... 110.187132 122.154961 118.854003 114.703463 126.783153 123.489686 119.279961 131.182466 127.997667 123.414661
std 74.620705 74.784886 80.923406 73.353357 73.518582 79.865519 72.646668 72.710732 78.998611 72.030968 ... 63.159612 63.396712 62.094129 62.562096 62.178453 60.782841 61.443686 61.734873 60.342091 61.339770
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 80.000000 83.000000 71.000000 84.750000 87.000000 75.000000 87.000000 90.000000 78.000000 90.000000 ... 61.000000 74.000000 72.000000 67.000000 80.000000 79.000000 73.000000 86.000000 85.000000 79.000000
50% 140.000000 146.000000 151.000000 145.000000 150.000000 155.000000 148.000000 153.000000 157.000000 150.000000 ... 104.000000 118.000000 115.000000 109.000000 124.000000 119.000000 114.000000 128.000000 124.000000 119.000000
75% 205.000000 209.000000 219.000000 206.000000 210.000000 220.000000 208.000000 212.000000 221.250000 209.000000 ... 151.000000 166.000000 160.000000 155.250000 170.000000 164.000000 159.000000 173.000000 167.000000 162.000000
max 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 ... 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000

8 rows × 300 columns


In [25]:
df_noncars.describe()


Out[25]:
red row 0 col 0 green row 0 col 0 blue row 0 col 0 red row 0 col 1 green row 0 col 1 blue row 0 col 1 red row 0 col 2 green row 0 col 2 blue row 0 col 2 red row 0 col 3 ... blue row 9 col 6 red row 9 col 7 green row 9 col 7 blue row 9 col 7 red row 9 col 8 green row 9 col 8 blue row 9 col 8 red row 9 col 9 green row 9 col 9 blue row 9 col 9
count 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 ... 25000.00000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.000000 25000.00000 25000.000000 25000.000000
mean 119.362720 114.621640 105.836880 121.931000 116.547280 107.337000 123.955880 118.040080 108.682640 125.243160 ... 116.80360 132.899440 124.682640 115.165480 130.539360 122.949120 113.516040 127.54900 120.782920 111.790120
std 64.019557 63.558962 65.149827 62.495292 62.140412 63.681475 61.261071 60.863272 62.306384 60.144759 ... 58.26897 58.758965 57.096383 59.011399 59.848577 58.253937 60.209861 61.37501 59.851473 61.802969
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000
25% 69.000000 63.000000 52.000000 73.000000 67.000000 56.000000 76.000000 70.000000 59.000000 79.000000 ... 72.00000 89.000000 82.000000 69.000000 86.000000 79.000000 66.000000 81.00000 76.000000 63.000000
50% 117.000000 111.000000 99.000000 120.000000 114.000000 101.000000 123.000000 116.000000 103.000000 125.000000 ... 113.00000 134.000000 124.000000 110.500000 130.000000 122.000000 109.000000 127.00000 120.000000 106.000000
75% 166.000000 161.000000 152.000000 168.000000 161.000000 153.000000 169.000000 162.000000 153.000000 170.000000 ... 157.00000 176.000000 165.000000 157.000000 175.000000 164.000000 156.000000 172.00000 163.000000 155.000000
max 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 ... 255.00000 255.000000 255.000000 255.000000 255.000000 255.000000 255.000000 255.00000 255.000000 255.000000

8 rows × 300 columns


In [41]:
df_cars.head().plot(legend=False)


Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3469c4f860>

In [45]:
df_noncars.tail().plot(legend=False)


Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3469444208>

In [51]:
df_cars_blue = df_cars.filter(regex='blue')
df_cars_blue.head().plot(legend=False)
df_cars_green = df_cars.filter(regex='green')
df_cars_green.head().plot(legend=False)
df_cars_red = df_cars.filter(regex='red')
df_cars_red.head().plot(legend=False)


Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3464942128>

In [52]:
df_noncars_blue = df_noncars.filter(regex='blue')
df_noncars_blue.tail().plot(legend=False)
df_noncars_green = df_noncars.filter(regex='green')
df_noncars_green.tail().plot(legend=False)
df_noncars_red = df_noncars.filter(regex='red')
df_noncars_red.tail().plot(legend=False)


Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f34644315c0>

In [64]:
import re
df_cars_green = df_cars_green.rename(columns=lambda x: re.sub('green','blue',x))
df_cars_rgb = df_cars_blue.add(df_cars_green,fill_value=0)
df_cars_red = df_cars_red.rename(columns=lambda x: re.sub('red','blue',x))
df_cars_rgb = df_cars_rgb.add(df_cars_red,fill_value=0)
df_cars_rgb = df_cars_rgb.rename(columns=lambda x: re.sub('blue','rgb',x))
df_cars_rgb.head()


Out[64]:
rgb row 0 col 0 rgb row 0 col 1 rgb row 0 col 2 rgb row 0 col 3 rgb row 0 col 4 rgb row 0 col 5 rgb row 0 col 6 rgb row 0 col 7 rgb row 0 col 8 rgb row 0 col 9 ... rgb row 9 col 0 rgb row 9 col 1 rgb row 9 col 2 rgb row 9 col 3 rgb row 9 col 4 rgb row 9 col 5 rgb row 9 col 6 rgb row 9 col 7 rgb row 9 col 8 rgb row 9 col 9
0 712 539 518 566 445 381 528 531 429 413 ... 438 389 370 317 212 116 68 70 73 82
1 398 298 345 312 242 125 224 212 102 81 ... 578 584 544 107 141 344 478 508 523 496
2 517 574 550 226 136 166 234 599 560 515 ... 97 85 66 31 43 71 135 143 166 159
3 647 664 685 692 695 682 667 663 660 425 ... 539 523 343 217 184 178 200 240 443 395
4 556 569 572 577 615 631 606 621 647 664 ... 34 17 6 3 6 6 30 45 50 34

5 rows × 100 columns


In [66]:
df_cars_rgb.head().plot(legend=False)


Out[66]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f34641759e8>

In [68]:
df_noncars_green = df_noncars_green.rename(columns=lambda x: re.sub('green','blue',x))
df_noncars_rgb = df_noncars_blue.add(df_noncars_green,fill_value=0)
df_noncars_red = df_noncars_red.rename(columns=lambda x: re.sub('red','blue',x))
df_noncars_rgb = df_noncars_rgb.add(df_noncars_red,fill_value=0)
df_noncars_rgb = df_noncars_rgb.rename(columns=lambda x: re.sub('blue','rgb',x))
df_noncars_rgb.tail()


Out[68]:
rgb row 0 col 0 rgb row 0 col 1 rgb row 0 col 2 rgb row 0 col 3 rgb row 0 col 4 rgb row 0 col 5 rgb row 0 col 6 rgb row 0 col 7 rgb row 0 col 8 rgb row 0 col 9 ... rgb row 9 col 0 rgb row 9 col 1 rgb row 9 col 2 rgb row 9 col 3 rgb row 9 col 4 rgb row 9 col 5 rgb row 9 col 6 rgb row 9 col 7 rgb row 9 col 8 rgb row 9 col 9
33139 466 547 509 442 471 480 467 425 489 406 ... 303 299 305 347 248 296 238 238 300 349
33140 632 684 615 529 623 700 676 642 566 254 ... 350 361 359 357 371 380 332 309 282 244
33141 488 431 421 409 406 403 386 376 297 370 ... 504 412 476 409 393 369 413 451 443 434
33142 572 689 680 435 446 615 622 457 414 474 ... 494 498 373 264 260 250 253 436 449 408
33143 571 549 455 718 556 632 728 657 677 665 ... 282 291 302 251 285 279 302 283 302 304

5 rows × 100 columns


In [71]:
df_noncars_rgb.tail().plot(legend=False)



In [155]:
df_cars_rgb_single = df_cars_rgb.iloc[0]
row0,row1,row2,row3,row4,row5,row6,row7,row8,row9 = ([] for x in range(10))

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 0'):
                row0.append(value)
                
for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 1'):
                row1.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 2'):
                row2.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 3'):
                row3.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 4'):
                row4.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 5'):
                row5.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 6'):
                row6.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 7'):
                row7.append(value)

for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 8'):
                row8.append(value)
                
for key, value in df_cars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 9'):
                row9.append(value)

df_cars_rgb_rows = pd.DataFrame(
    {'row0': row0,
     'row1': row1,
     'row3': row3,
     'row4': row4,
     'row5': row5,
     'row6': row6,
     'row7': row7,
     'row8': row8,
     'row9': row9
    })

df_cars_rgb_rows.plot(legend=False)


Out[155]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f34479b0940>

In [147]:
df_noncars_rgb_single = df_noncars_rgb.iloc[0]
row0,row1,row2,row3,row4,row5,row6,row7,row8,row9 = ([] for x in range(10))

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 0'):
                row0.append(value)
                
for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 1'):
                row1.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 2'):
                row2.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 3'):
                row3.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 4'):
                row4.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 5'):
                row5.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 6'):
                row6.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 7'):
                row7.append(value)

for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 8'):
                row8.append(value)
                
for key, value in df_noncars_rgb_single.iteritems():   # iter on both keys and values
        if key.startswith('rgb row 9'):
                row9.append(value)

df_noncars_rgb_rows = pd.DataFrame(
    {'row0': row0,
     'row1': row1,
     'row3': row3,
     'row4': row4,
     'row5': row5,
     'row6': row6,
     'row7': row7,
     'row8': row8,
     'row9': row9
    })

df_noncars_rgb_rows.plot(legend=False)


Out[147]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3498cfc048>

In [150]:
df_noncars_rgb_rows.describe()


Out[150]:
row0 row1 row3 row4 row5 row6 row7 row8 row9
count 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000
mean 269.700000 308.400000 292.700000 322.600000 355.200000 335.200000 278.600000 254.200000 205.800000
std 69.060441 91.852781 140.729883 143.043272 137.074027 156.961991 109.460495 108.013168 70.497912
min 132.000000 104.000000 57.000000 65.000000 179.000000 114.000000 105.000000 127.000000 73.000000
25% 234.250000 275.250000 199.250000 282.000000 215.750000 193.750000 253.750000 182.500000 175.750000
50% 291.000000 329.000000 304.000000 315.000000 391.500000 385.000000 288.000000 206.500000 192.500000
75% 318.750000 375.250000 404.250000 442.750000 462.250000 454.500000 312.250000 340.500000 238.000000
max 338.000000 393.000000 477.000000 479.000000 555.000000 561.000000 506.000000 423.000000 336.000000

In [154]:
df_cars_rgb_rows.describe()


Out[154]:
row0 row1 row3 row4 row5 row6 row7 row8 row9
count 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000
mean 506.200000 515.100000 374.600000 317.800000 319.300000 352.900000 276.200000 180.400000 213.500000
std 95.627518 83.682535 175.998864 82.838263 82.721823 75.484288 92.861665 137.383648 150.815745
min 381.000000 367.000000 154.000000 234.000000 220.000000 263.000000 169.000000 58.000000 68.000000
25% 433.000000 464.750000 195.250000 256.750000 278.500000 314.500000 204.500000 71.500000 75.250000
50% 523.000000 544.500000 418.000000 300.000000 285.500000 321.000000 258.500000 120.000000 164.000000
75% 537.000000 558.750000 529.500000 343.500000 384.500000 377.500000 309.000000 252.750000 356.750000
max 712.000000 654.000000 562.000000 499.000000 447.000000 517.000000 436.000000 438.000000 438.000000

In [ ]: