In [52]:
import re
r = re.compile('[0-9]+')#regular expression to look for integers
solarradiation = []
with open('20160707.html','r') as wp:
for line in wp.readlines():
if 'data-cell' in line and'w/m' in line:
print (line)
#add value to radiation list
solarradiation.append(int(r.search(line).group()))
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">1 <span class="table-unit">w/m²</span></td>
<td class="data-cell">2 <span class="table-unit">w/m²</span></td>
<td class="data-cell">3 <span class="table-unit">w/m²</span></td>
<td class="data-cell">5 <span class="table-unit">w/m²</span></td>
<td class="data-cell">8 <span class="table-unit">w/m²</span></td>
<td class="data-cell">12 <span class="table-unit">w/m²</span></td>
<td class="data-cell">22 <span class="table-unit">w/m²</span></td>
<td class="data-cell">30 <span class="table-unit">w/m²</span></td>
<td class="data-cell">37 <span class="table-unit">w/m²</span></td>
<td class="data-cell">56 <span class="table-unit">w/m²</span></td>
<td class="data-cell">67 <span class="table-unit">w/m²</span></td>
<td class="data-cell">75 <span class="table-unit">w/m²</span></td>
<td class="data-cell">82 <span class="table-unit">w/m²</span></td>
<td class="data-cell">102 <span class="table-unit">w/m²</span></td>
<td class="data-cell">109 <span class="table-unit">w/m²</span></td>
<td class="data-cell">115 <span class="table-unit">w/m²</span></td>
<td class="data-cell">124 <span class="table-unit">w/m²</span></td>
<td class="data-cell">133 <span class="table-unit">w/m²</span></td>
<td class="data-cell">143 <span class="table-unit">w/m²</span></td>
<td class="data-cell">165 <span class="table-unit">w/m²</span></td>
<td class="data-cell">177 <span class="table-unit">w/m²</span></td>
<td class="data-cell">193 <span class="table-unit">w/m²</span></td>
<td class="data-cell">200 <span class="table-unit">w/m²</span></td>
<td class="data-cell">204 <span class="table-unit">w/m²</span></td>
<td class="data-cell">224 <span class="table-unit">w/m²</span></td>
<td class="data-cell">233 <span class="table-unit">w/m²</span></td>
<td class="data-cell">241 <span class="table-unit">w/m²</span></td>
<td class="data-cell">256 <span class="table-unit">w/m²</span></td>
<td class="data-cell">266 <span class="table-unit">w/m²</span></td>
<td class="data-cell">274 <span class="table-unit">w/m²</span></td>
<td class="data-cell">292 <span class="table-unit">w/m²</span></td>
<td class="data-cell">304 <span class="table-unit">w/m²</span></td>
<td class="data-cell">314 <span class="table-unit">w/m²</span></td>
<td class="data-cell">335 <span class="table-unit">w/m²</span></td>
<td class="data-cell">318 <span class="table-unit">w/m²</span></td>
<td class="data-cell">343 <span class="table-unit">w/m²</span></td>
<td class="data-cell">346 <span class="table-unit">w/m²</span></td>
<td class="data-cell">320 <span class="table-unit">w/m²</span></td>
<td class="data-cell">351 <span class="table-unit">w/m²</span></td>
<td class="data-cell">322 <span class="table-unit">w/m²</span></td>
<td class="data-cell">375 <span class="table-unit">w/m²</span></td>
<td class="data-cell">401 <span class="table-unit">w/m²</span></td>
<td class="data-cell">418 <span class="table-unit">w/m²</span></td>
<td class="data-cell">416 <span class="table-unit">w/m²</span></td>
<td class="data-cell">393 <span class="table-unit">w/m²</span></td>
<td class="data-cell">426 <span class="table-unit">w/m²</span></td>
<td class="data-cell">436 <span class="table-unit">w/m²</span></td>
<td class="data-cell">480 <span class="table-unit">w/m²</span></td>
<td class="data-cell">562 <span class="table-unit">w/m²</span></td>
<td class="data-cell">270 <span class="table-unit">w/m²</span></td>
<td class="data-cell">243 <span class="table-unit">w/m²</span></td>
<td class="data-cell">157 <span class="table-unit">w/m²</span></td>
<td class="data-cell">161 <span class="table-unit">w/m²</span></td>
<td class="data-cell">262 <span class="table-unit">w/m²</span></td>
<td class="data-cell">597 <span class="table-unit">w/m²</span></td>
<td class="data-cell">443 <span class="table-unit">w/m²</span></td>
<td class="data-cell">662 <span class="table-unit">w/m²</span></td>
<td class="data-cell">425 <span class="table-unit">w/m²</span></td>
<td class="data-cell">473 <span class="table-unit">w/m²</span></td>
<td class="data-cell">444 <span class="table-unit">w/m²</span></td>
<td class="data-cell">380 <span class="table-unit">w/m²</span></td>
<td class="data-cell">608 <span class="table-unit">w/m²</span></td>
<td class="data-cell">615 <span class="table-unit">w/m²</span></td>
<td class="data-cell">614 <span class="table-unit">w/m²</span></td>
<td class="data-cell">647 <span class="table-unit">w/m²</span></td>
<td class="data-cell">606 <span class="table-unit">w/m²</span></td>
<td class="data-cell">658 <span class="table-unit">w/m²</span></td>
<td class="data-cell">244 <span class="table-unit">w/m²</span></td>
<td class="data-cell">323 <span class="table-unit">w/m²</span></td>
<td class="data-cell">726 <span class="table-unit">w/m²</span></td>
<td class="data-cell">235 <span class="table-unit">w/m²</span></td>
<td class="data-cell">725 <span class="table-unit">w/m²</span></td>
<td class="data-cell">338 <span class="table-unit">w/m²</span></td>
<td class="data-cell">525 <span class="table-unit">w/m²</span></td>
<td class="data-cell">352 <span class="table-unit">w/m²</span></td>
<td class="data-cell">321 <span class="table-unit">w/m²</span></td>
<td class="data-cell">429 <span class="table-unit">w/m²</span></td>
<td class="data-cell">410 <span class="table-unit">w/m²</span></td>
<td class="data-cell">292 <span class="table-unit">w/m²</span></td>
<td class="data-cell">217 <span class="table-unit">w/m²</span></td>
<td class="data-cell">165 <span class="table-unit">w/m²</span></td>
<td class="data-cell">165 <span class="table-unit">w/m²</span></td>
<td class="data-cell">282 <span class="table-unit">w/m²</span></td>
<td class="data-cell">178 <span class="table-unit">w/m²</span></td>
<td class="data-cell">158 <span class="table-unit">w/m²</span></td>
<td class="data-cell">239 <span class="table-unit">w/m²</span></td>
<td class="data-cell">359 <span class="table-unit">w/m²</span></td>
<td class="data-cell">173 <span class="table-unit">w/m²</span></td>
<td class="data-cell">209 <span class="table-unit">w/m²</span></td>
<td class="data-cell">704 <span class="table-unit">w/m²</span></td>
<td class="data-cell">770 <span class="table-unit">w/m²</span></td>
<td class="data-cell">618 <span class="table-unit">w/m²</span></td>
<td class="data-cell">1070 <span class="table-unit">w/m²</span></td>
<td class="data-cell">459 <span class="table-unit">w/m²</span></td>
<td class="data-cell">373 <span class="table-unit">w/m²</span></td>
<td class="data-cell">399 <span class="table-unit">w/m²</span></td>
<td class="data-cell">328 <span class="table-unit">w/m²</span></td>
<td class="data-cell">369 <span class="table-unit">w/m²</span></td>
<td class="data-cell">942 <span class="table-unit">w/m²</span></td>
<td class="data-cell">930 <span class="table-unit">w/m²</span></td>
<td class="data-cell">842 <span class="table-unit">w/m²</span></td>
<td class="data-cell">176 <span class="table-unit">w/m²</span></td>
<td class="data-cell">54 <span class="table-unit">w/m²</span></td>
<td class="data-cell">30 <span class="table-unit">w/m²</span></td>
<td class="data-cell">40 <span class="table-unit">w/m²</span></td>
<td class="data-cell">68 <span class="table-unit">w/m²</span></td>
<td class="data-cell">96 <span class="table-unit">w/m²</span></td>
<td class="data-cell">132 <span class="table-unit">w/m²</span></td>
<td class="data-cell">221 <span class="table-unit">w/m²</span></td>
<td class="data-cell">245 <span class="table-unit">w/m²</span></td>
<td class="data-cell">334 <span class="table-unit">w/m²</span></td>
<td class="data-cell">248 <span class="table-unit">w/m²</span></td>
<td class="data-cell">167 <span class="table-unit">w/m²</span></td>
<td class="data-cell">293 <span class="table-unit">w/m²</span></td>
<td class="data-cell">547 <span class="table-unit">w/m²</span></td>
<td class="data-cell">561 <span class="table-unit">w/m²</span></td>
<td class="data-cell">481 <span class="table-unit">w/m²</span></td>
<td class="data-cell">387 <span class="table-unit">w/m²</span></td>
<td class="data-cell">453 <span class="table-unit">w/m²</span></td>
<td class="data-cell">591 <span class="table-unit">w/m²</span></td>
<td class="data-cell">431 <span class="table-unit">w/m²</span></td>
<td class="data-cell">428 <span class="table-unit">w/m²</span></td>
<td class="data-cell">264 <span class="table-unit">w/m²</span></td>
<td class="data-cell">254 <span class="table-unit">w/m²</span></td>
<td class="data-cell">182 <span class="table-unit">w/m²</span></td>
<td class="data-cell">126 <span class="table-unit">w/m²</span></td>
<td class="data-cell">123 <span class="table-unit">w/m²</span></td>
<td class="data-cell">117 <span class="table-unit">w/m²</span></td>
<td class="data-cell">117 <span class="table-unit">w/m²</span></td>
<td class="data-cell">150 <span class="table-unit">w/m²</span></td>
<td class="data-cell">143 <span class="table-unit">w/m²</span></td>
<td class="data-cell">225 <span class="table-unit">w/m²</span></td>
<td class="data-cell">182 <span class="table-unit">w/m²</span></td>
<td class="data-cell">187 <span class="table-unit">w/m²</span></td>
<td class="data-cell">188 <span class="table-unit">w/m²</span></td>
<td class="data-cell">205 <span class="table-unit">w/m²</span></td>
<td class="data-cell">261 <span class="table-unit">w/m²</span></td>
<td class="data-cell">258 <span class="table-unit">w/m²</span></td>
<td class="data-cell">143 <span class="table-unit">w/m²</span></td>
<td class="data-cell">85 <span class="table-unit">w/m²</span></td>
<td class="data-cell">70 <span class="table-unit">w/m²</span></td>
<td class="data-cell">51 <span class="table-unit">w/m²</span></td>
<td class="data-cell">39 <span class="table-unit">w/m²</span></td>
<td class="data-cell">33 <span class="table-unit">w/m²</span></td>
<td class="data-cell">27 <span class="table-unit">w/m²</span></td>
<td class="data-cell">41 <span class="table-unit">w/m²</span></td>
<td class="data-cell">44 <span class="table-unit">w/m²</span></td>
<td class="data-cell">44 <span class="table-unit">w/m²</span></td>
<td class="data-cell">46 <span class="table-unit">w/m²</span></td>
<td class="data-cell">38 <span class="table-unit">w/m²</span></td>
<td class="data-cell">39 <span class="table-unit">w/m²</span></td>
<td class="data-cell">41 <span class="table-unit">w/m²</span></td>
<td class="data-cell">41 <span class="table-unit">w/m²</span></td>
<td class="data-cell">51 <span class="table-unit">w/m²</span></td>
<td class="data-cell">66 <span class="table-unit">w/m²</span></td>
<td class="data-cell">65 <span class="table-unit">w/m²</span></td>
<td class="data-cell">61 <span class="table-unit">w/m²</span></td>
<td class="data-cell">56 <span class="table-unit">w/m²</span></td>
<td class="data-cell">51 <span class="table-unit">w/m²</span></td>
<td class="data-cell">46 <span class="table-unit">w/m²</span></td>
<td class="data-cell">31 <span class="table-unit">w/m²</span></td>
<td class="data-cell">29 <span class="table-unit">w/m²</span></td>
<td class="data-cell">27 <span class="table-unit">w/m²</span></td>
<td class="data-cell">24 <span class="table-unit">w/m²</span></td>
<td class="data-cell">24 <span class="table-unit">w/m²</span></td>
<td class="data-cell">20 <span class="table-unit">w/m²</span></td>
<td class="data-cell">16 <span class="table-unit">w/m²</span></td>
<td class="data-cell">14 <span class="table-unit">w/m²</span></td>
<td class="data-cell">11 <span class="table-unit">w/m²</span></td>
<td class="data-cell">7 <span class="table-unit">w/m²</span></td>
<td class="data-cell">3 <span class="table-unit">w/m²</span></td>
<td class="data-cell">2 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
<td class="data-cell">0 <span class="table-unit">w/m²</span></td>
In [53]:
t = re.compile('[0-9]{1,2}:[0-9]{2} [AP]M')
times = []
with open('20160707.html','r') as wp:
for line in wp.readlines():
if 'heading-cell' in line and t.search(line) != None:
print (line)
times.append(t.search(line).group())
<td class="heading-cell">12:03 AM</td>
<td class="heading-cell">12:08 AM</td>
<td class="heading-cell">12:14 AM</td>
<td class="heading-cell">12:19 AM</td>
<td class="heading-cell">12:24 AM</td>
<td class="heading-cell">12:29 AM</td>
<td class="heading-cell">12:35 AM</td>
<td class="heading-cell">12:40 AM</td>
<td class="heading-cell">12:45 AM</td>
<td class="heading-cell">12:51 AM</td>
<td class="heading-cell">12:56 AM</td>
<td class="heading-cell">1:01 AM</td>
<td class="heading-cell">1:06 AM</td>
<td class="heading-cell">1:12 AM</td>
<td class="heading-cell">1:17 AM</td>
<td class="heading-cell">1:22 AM</td>
<td class="heading-cell">1:27 AM</td>
<td class="heading-cell">1:33 AM</td>
<td class="heading-cell">1:38 AM</td>
<td class="heading-cell">1:43 AM</td>
<td class="heading-cell">1:49 AM</td>
<td class="heading-cell">1:54 AM</td>
<td class="heading-cell">1:59 AM</td>
<td class="heading-cell">2:04 AM</td>
<td class="heading-cell">2:10 AM</td>
<td class="heading-cell">2:15 AM</td>
<td class="heading-cell">2:20 AM</td>
<td class="heading-cell">2:27 AM</td>
<td class="heading-cell">2:32 AM</td>
<td class="heading-cell">2:37 AM</td>
<td class="heading-cell">2:42 AM</td>
<td class="heading-cell">2:48 AM</td>
<td class="heading-cell">2:53 AM</td>
<td class="heading-cell">2:58 AM</td>
<td class="heading-cell">3:03 AM</td>
<td class="heading-cell">3:09 AM</td>
<td class="heading-cell">3:14 AM</td>
<td class="heading-cell">3:19 AM</td>
<td class="heading-cell">3:24 AM</td>
<td class="heading-cell">3:30 AM</td>
<td class="heading-cell">3:35 AM</td>
<td class="heading-cell">3:40 AM</td>
<td class="heading-cell">3:46 AM</td>
<td class="heading-cell">3:51 AM</td>
<td class="heading-cell">3:56 AM</td>
<td class="heading-cell">4:01 AM</td>
<td class="heading-cell">4:07 AM</td>
<td class="heading-cell">4:12 AM</td>
<td class="heading-cell">4:17 AM</td>
<td class="heading-cell">4:22 AM</td>
<td class="heading-cell">4:28 AM</td>
<td class="heading-cell">4:33 AM</td>
<td class="heading-cell">4:38 AM</td>
<td class="heading-cell">4:43 AM</td>
<td class="heading-cell">4:49 AM</td>
<td class="heading-cell">4:54 AM</td>
<td class="heading-cell">4:59 AM</td>
<td class="heading-cell">5:05 AM</td>
<td class="heading-cell">5:10 AM</td>
<td class="heading-cell">5:15 AM</td>
<td class="heading-cell">5:20 AM</td>
<td class="heading-cell">5:26 AM</td>
<td class="heading-cell">5:31 AM</td>
<td class="heading-cell">5:36 AM</td>
<td class="heading-cell">5:41 AM</td>
<td class="heading-cell">5:47 AM</td>
<td class="heading-cell">5:52 AM</td>
<td class="heading-cell">5:57 AM</td>
<td class="heading-cell">6:02 AM</td>
<td class="heading-cell">6:08 AM</td>
<td class="heading-cell">6:13 AM</td>
<td class="heading-cell">6:18 AM</td>
<td class="heading-cell">6:24 AM</td>
<td class="heading-cell">6:29 AM</td>
<td class="heading-cell">6:34 AM</td>
<td class="heading-cell">6:39 AM</td>
<td class="heading-cell">6:45 AM</td>
<td class="heading-cell">6:50 AM</td>
<td class="heading-cell">6:55 AM</td>
<td class="heading-cell">7:02 AM</td>
<td class="heading-cell">7:07 AM</td>
<td class="heading-cell">7:12 AM</td>
<td class="heading-cell">7:18 AM</td>
<td class="heading-cell">7:23 AM</td>
<td class="heading-cell">7:28 AM</td>
<td class="heading-cell">7:33 AM</td>
<td class="heading-cell">7:39 AM</td>
<td class="heading-cell">7:44 AM</td>
<td class="heading-cell">7:49 AM</td>
<td class="heading-cell">7:55 AM</td>
<td class="heading-cell">8:01 AM</td>
<td class="heading-cell">8:06 AM</td>
<td class="heading-cell">8:11 AM</td>
<td class="heading-cell">8:17 AM</td>
<td class="heading-cell">8:22 AM</td>
<td class="heading-cell">8:27 AM</td>
<td class="heading-cell">8:33 AM</td>
<td class="heading-cell">8:38 AM</td>
<td class="heading-cell">8:43 AM</td>
<td class="heading-cell">8:48 AM</td>
<td class="heading-cell">8:54 AM</td>
<td class="heading-cell">8:59 AM</td>
<td class="heading-cell">9:04 AM</td>
<td class="heading-cell">9:10 AM</td>
<td class="heading-cell">9:15 AM</td>
<td class="heading-cell">9:20 AM</td>
<td class="heading-cell">9:25 AM</td>
<td class="heading-cell">9:31 AM</td>
<td class="heading-cell">9:36 AM</td>
<td class="heading-cell">9:41 AM</td>
<td class="heading-cell">9:46 AM</td>
<td class="heading-cell">9:52 AM</td>
<td class="heading-cell">9:57 AM</td>
<td class="heading-cell">10:02 AM</td>
<td class="heading-cell">10:07 AM</td>
<td class="heading-cell">10:13 AM</td>
<td class="heading-cell">10:18 AM</td>
<td class="heading-cell">10:23 AM</td>
<td class="heading-cell">10:29 AM</td>
<td class="heading-cell">10:34 AM</td>
<td class="heading-cell">10:39 AM</td>
<td class="heading-cell">10:44 AM</td>
<td class="heading-cell">10:50 AM</td>
<td class="heading-cell">10:55 AM</td>
<td class="heading-cell">11:00 AM</td>
<td class="heading-cell">11:05 AM</td>
<td class="heading-cell">11:11 AM</td>
<td class="heading-cell">11:16 AM</td>
<td class="heading-cell">11:21 AM</td>
<td class="heading-cell">11:26 AM</td>
<td class="heading-cell">11:32 AM</td>
<td class="heading-cell">11:37 AM</td>
<td class="heading-cell">11:42 AM</td>
<td class="heading-cell">11:48 AM</td>
<td class="heading-cell">11:54 AM</td>
<td class="heading-cell">11:59 AM</td>
<td class="heading-cell">12:06 PM</td>
<td class="heading-cell">12:11 PM</td>
<td class="heading-cell">12:16 PM</td>
<td class="heading-cell">12:22 PM</td>
<td class="heading-cell">12:28 PM</td>
<td class="heading-cell">12:33 PM</td>
<td class="heading-cell">12:39 PM</td>
<td class="heading-cell">12:44 PM</td>
<td class="heading-cell">12:49 PM</td>
<td class="heading-cell">12:54 PM</td>
<td class="heading-cell">1:00 PM</td>
<td class="heading-cell">1:05 PM</td>
<td class="heading-cell">1:10 PM</td>
<td class="heading-cell">1:16 PM</td>
<td class="heading-cell">1:21 PM</td>
<td class="heading-cell">1:26 PM</td>
<td class="heading-cell">1:32 PM</td>
<td class="heading-cell">1:37 PM</td>
<td class="heading-cell">1:42 PM</td>
<td class="heading-cell">1:48 PM</td>
<td class="heading-cell">1:53 PM</td>
<td class="heading-cell">1:58 PM</td>
<td class="heading-cell">2:04 PM</td>
<td class="heading-cell">2:09 PM</td>
<td class="heading-cell">2:14 PM</td>
<td class="heading-cell">2:19 PM</td>
<td class="heading-cell">2:25 PM</td>
<td class="heading-cell">2:30 PM</td>
<td class="heading-cell">2:35 PM</td>
<td class="heading-cell">2:40 PM</td>
<td class="heading-cell">2:46 PM</td>
<td class="heading-cell">2:51 PM</td>
<td class="heading-cell">2:56 PM</td>
<td class="heading-cell">3:02 PM</td>
<td class="heading-cell">3:07 PM</td>
<td class="heading-cell">3:12 PM</td>
<td class="heading-cell">3:23 PM</td>
<td class="heading-cell">3:28 PM</td>
<td class="heading-cell">3:33 PM</td>
<td class="heading-cell">3:38 PM</td>
<td class="heading-cell">3:44 PM</td>
<td class="heading-cell">3:49 PM</td>
<td class="heading-cell">3:54 PM</td>
<td class="heading-cell">4:00 PM</td>
<td class="heading-cell">4:05 PM</td>
<td class="heading-cell">4:10 PM</td>
<td class="heading-cell">4:15 PM</td>
<td class="heading-cell">4:21 PM</td>
<td class="heading-cell">4:26 PM</td>
<td class="heading-cell">4:31 PM</td>
<td class="heading-cell">4:36 PM</td>
<td class="heading-cell">4:42 PM</td>
<td class="heading-cell">4:47 PM</td>
<td class="heading-cell">4:52 PM</td>
<td class="heading-cell">4:57 PM</td>
<td class="heading-cell">5:03 PM</td>
<td class="heading-cell">5:08 PM</td>
<td class="heading-cell">5:13 PM</td>
<td class="heading-cell">5:19 PM</td>
<td class="heading-cell">5:24 PM</td>
<td class="heading-cell">5:29 PM</td>
<td class="heading-cell">5:34 PM</td>
<td class="heading-cell">5:40 PM</td>
<td class="heading-cell">5:45 PM</td>
<td class="heading-cell">5:50 PM</td>
<td class="heading-cell">5:56 PM</td>
<td class="heading-cell">6:01 PM</td>
<td class="heading-cell">6:06 PM</td>
<td class="heading-cell">6:11 PM</td>
<td class="heading-cell">6:17 PM</td>
<td class="heading-cell">6:22 PM</td>
<td class="heading-cell">6:27 PM</td>
<td class="heading-cell">6:32 PM</td>
<td class="heading-cell">6:38 PM</td>
<td class="heading-cell">6:43 PM</td>
<td class="heading-cell">6:48 PM</td>
<td class="heading-cell">6:54 PM</td>
<td class="heading-cell">6:59 PM</td>
<td class="heading-cell">7:04 PM</td>
<td class="heading-cell">7:09 PM</td>
<td class="heading-cell">7:15 PM</td>
<td class="heading-cell">7:20 PM</td>
<td class="heading-cell">7:25 PM</td>
<td class="heading-cell">7:30 PM</td>
<td class="heading-cell">7:36 PM</td>
<td class="heading-cell">7:41 PM</td>
<td class="heading-cell">7:46 PM</td>
<td class="heading-cell">7:51 PM</td>
<td class="heading-cell">7:57 PM</td>
<td class="heading-cell">8:02 PM</td>
<td class="heading-cell">8:07 PM</td>
<td class="heading-cell">8:13 PM</td>
<td class="heading-cell">8:18 PM</td>
<td class="heading-cell">8:23 PM</td>
<td class="heading-cell">8:28 PM</td>
<td class="heading-cell">8:34 PM</td>
<td class="heading-cell">8:44 PM</td>
<td class="heading-cell">8:49 PM</td>
<td class="heading-cell">8:55 PM</td>
<td class="heading-cell">9:00 PM</td>
<td class="heading-cell">9:05 PM</td>
<td class="heading-cell">9:10 PM</td>
<td class="heading-cell">9:16 PM</td>
<td class="heading-cell">9:21 PM</td>
<td class="heading-cell">9:26 PM</td>
<td class="heading-cell">9:32 PM</td>
<td class="heading-cell">9:37 PM</td>
<td class="heading-cell">9:42 PM</td>
<td class="heading-cell">9:47 PM</td>
<td class="heading-cell">9:53 PM</td>
<td class="heading-cell">9:58 PM</td>
<td class="heading-cell">10:03 PM</td>
<td class="heading-cell">10:08 PM</td>
<td class="heading-cell">10:14 PM</td>
<td class="heading-cell">10:19 PM</td>
<td class="heading-cell">10:24 PM</td>
<td class="heading-cell">10:29 PM</td>
<td class="heading-cell">10:35 PM</td>
<td class="heading-cell">10:40 PM</td>
<td class="heading-cell">10:45 PM</td>
<td class="heading-cell">10:51 PM</td>
<td class="heading-cell">10:56 PM</td>
<td class="heading-cell">11:01 PM</td>
<td class="heading-cell">11:06 PM</td>
<td class="heading-cell">11:12 PM</td>
<td class="heading-cell">11:17 PM</td>
<td class="heading-cell">11:22 PM</td>
<td class="heading-cell">11:27 PM</td>
<td class="heading-cell">11:33 PM</td>
<td class="heading-cell">11:38 PM</td>
<td class="heading-cell">11:43 PM</td>
<td class="heading-cell">11:54 PM</td>
<td class="heading-cell">11:59 PM</td>
In [47]:
print(times)
['12:04 AM', '12:10 AM', '12:15 AM', '12:20 AM', '12:25 AM', '12:31 AM', '12:36 AM', '12:41 AM', '12:46 AM', '12:52 AM', '12:57 AM', '1:02 AM', '1:08 AM', '1:13 AM', '1:18 AM', '1:23 AM', '1:29 AM', '1:34 AM', '1:39 AM', '1:44 AM', '1:50 AM', '1:55 AM', '2:05 AM', '2:11 AM', '2:16 AM', '2:21 AM', '2:27 AM', '2:32 AM', '2:37 AM', '2:42 AM', '2:48 AM', '2:53 AM', '2:58 AM', '3:03 AM', '3:09 AM', '3:14 AM', '3:19 AM', '3:24 AM', '3:30 AM', '3:35 AM', '3:40 AM', '3:46 AM', '3:51 AM', '3:56 AM', '4:01 AM', '4:07 AM', '4:12 AM', '4:17 AM', '4:22 AM', '4:28 AM', '4:33 AM', '4:38 AM', '4:43 AM', '4:49 AM', '4:54 AM', '4:59 AM', '5:05 AM', '5:10 AM', '5:15 AM', '5:20 AM', '5:26 AM', '5:31 AM', '5:36 AM', '5:41 AM', '5:47 AM', '5:52 AM', '5:57 AM', '6:02 AM', '6:08 AM', '6:13 AM', '6:18 AM', '6:24 AM', '6:29 AM', '6:34 AM', '6:39 AM', '6:45 AM', '6:50 AM', '6:55 AM', '7:00 AM', '7:06 AM', '7:11 AM', '7:16 AM', '7:23 AM', '7:28 AM', '7:34 AM', '7:39 AM', '7:44 AM', '7:50 AM', '7:55 AM', '8:01 AM', '8:07 AM', '8:13 AM', '8:19 AM', '8:24 AM', '8:29 AM', '8:35 AM', '8:40 AM', '8:45 AM', '8:50 AM', '8:56 AM', '9:01 AM', '9:06 AM', '9:12 AM', '9:17 AM', '9:22 AM', '9:27 AM', '9:33 AM', '9:38 AM', '9:43 AM', '9:48 AM', '9:54 AM', '9:59 AM', '10:04 AM', '10:09 AM', '10:15 AM', '10:20 AM', '10:25 AM', '10:31 AM', '10:36 AM', '10:41 AM', '10:46 AM', '10:52 AM', '10:57 AM', '11:02 AM', '11:08 AM', '11:13 AM', '11:18 AM', '11:24 AM', '11:29 AM', '11:34 AM', '11:40 AM', '11:45 AM', '11:50 AM', '11:55 AM', '12:01 PM', '12:06 PM', '12:11 PM', '12:17 PM', '12:22 PM', '12:27 PM', '12:33 PM', '12:44 PM', '12:49 PM', '12:55 PM', '1:00 PM', '1:05 PM', '1:10 PM', '1:16 PM', '1:21 PM', '1:27 PM', '1:32 PM', '1:37 PM', '1:43 PM', '1:48 PM', '1:53 PM', '1:58 PM', '2:04 PM', '2:09 PM', '2:14 PM', '2:20 PM', '2:25 PM', '2:31 PM', '2:39 PM', '2:44 PM', '2:56 PM', '3:01 PM', '3:09 PM', '3:14 PM', '3:20 PM', '3:25 PM', '3:30 PM', '3:35 PM', '3:49 PM', '3:56 PM', '4:01 PM', '4:06 PM', '4:11 PM', '4:17 PM', '4:22 PM', '4:27 PM', '4:35 PM', '4:41 PM', '4:55 PM', '5:03 PM', '5:08 PM', '5:13 PM', '5:18 PM', '5:24 PM', '5:29 PM', '5:34 PM', '5:40 PM', '5:45 PM', '5:50 PM', '5:55 PM', '6:01 PM', '6:06 PM', '6:11 PM', '6:16 PM', '6:22 PM', '6:27 PM', '6:32 PM', '6:38 PM', '6:43 PM', '6:48 PM', '6:57 PM', '7:03 PM', '7:09 PM', '7:14 PM', '7:19 PM', '7:24 PM', '7:30 PM', '7:35 PM', '7:40 PM', '7:46 PM', '7:51 PM', '7:56 PM', '8:01 PM', '8:07 PM', '8:12 PM', '8:17 PM', '8:22 PM', '8:28 PM', '8:33 PM', '8:38 PM', '8:43 PM', '8:49 PM', '8:54 PM', '8:59 PM', '9:05 PM', '9:10 PM', '9:15 PM', '9:20 PM', '9:26 PM', '9:31 PM', '9:36 PM', '9:41 PM', '9:47 PM', '9:52 PM', '9:57 PM', '10:03 PM', '10:08 PM', '10:13 PM', '10:18 PM', '10:24 PM', '10:29 PM', '10:34 PM', '10:39 PM', '10:45 PM', '10:50 PM', '10:55 PM', '11:01 PM', '11:06 PM', '11:11 PM', '11:16 PM', '11:22 PM', '11:27 PM', '11:32 PM', '11:37 PM', '11:43 PM', '11:48 PM', '11:53 PM', '11:59 PM', '12:04 AM', '12:10 AM', '12:15 AM', '12:20 AM', '12:25 AM', '12:31 AM', '12:36 AM', '12:41 AM', '12:46 AM', '12:52 AM', '12:57 AM', '1:02 AM', '1:08 AM', '1:13 AM', '1:18 AM', '1:23 AM', '1:29 AM', '1:34 AM', '1:39 AM', '1:44 AM', '1:50 AM', '1:55 AM', '2:05 AM', '2:11 AM', '2:16 AM', '2:21 AM', '2:27 AM', '2:32 AM', '2:37 AM', '2:42 AM', '2:48 AM', '2:53 AM', '2:58 AM', '3:03 AM', '3:09 AM', '3:14 AM', '3:19 AM', '3:24 AM', '3:30 AM', '3:35 AM', '3:40 AM', '3:46 AM', '3:51 AM', '3:56 AM', '4:01 AM', '4:07 AM', '4:12 AM', '4:17 AM', '4:22 AM', '4:28 AM', '4:33 AM', '4:38 AM', '4:43 AM', '4:49 AM', '4:54 AM', '4:59 AM', '5:05 AM', '5:10 AM', '5:15 AM', '5:20 AM', '5:26 AM', '5:31 AM', '5:36 AM', '5:41 AM', '5:47 AM', '5:52 AM', '5:57 AM', '6:02 AM', '6:08 AM', '6:13 AM', '6:18 AM', '6:24 AM', '6:29 AM', '6:34 AM', '6:39 AM', '6:45 AM', '6:50 AM', '6:55 AM', '7:00 AM', '7:06 AM', '7:11 AM', '7:16 AM', '7:23 AM', '7:28 AM', '7:34 AM', '7:39 AM', '7:44 AM', '7:50 AM', '7:55 AM', '8:01 AM', '8:07 AM', '8:13 AM', '8:19 AM', '8:24 AM', '8:29 AM', '8:35 AM', '8:40 AM', '8:45 AM', '8:50 AM', '8:56 AM', '9:01 AM', '9:06 AM', '9:12 AM', '9:17 AM', '9:22 AM', '9:27 AM', '9:33 AM', '9:38 AM', '9:43 AM', '9:48 AM', '9:54 AM', '9:59 AM', '10:04 AM', '10:09 AM', '10:15 AM', '10:20 AM', '10:25 AM', '10:31 AM', '10:36 AM', '10:41 AM', '10:46 AM', '10:52 AM', '10:57 AM', '11:02 AM', '11:08 AM', '11:13 AM', '11:18 AM', '11:24 AM', '11:29 AM', '11:34 AM', '11:40 AM', '11:45 AM', '11:50 AM', '11:55 AM', '12:01 PM', '12:06 PM', '12:11 PM', '12:17 PM', '12:22 PM', '12:27 PM', '12:33 PM', '12:44 PM', '12:49 PM', '12:55 PM', '1:00 PM', '1:05 PM', '1:10 PM', '1:16 PM', '1:21 PM', '1:27 PM', '1:32 PM', '1:37 PM', '1:43 PM', '1:48 PM', '1:53 PM', '1:58 PM', '2:04 PM', '2:09 PM', '2:14 PM', '2:20 PM', '2:25 PM', '2:31 PM', '2:39 PM', '2:44 PM', '2:56 PM', '3:01 PM', '3:09 PM', '3:14 PM', '3:20 PM', '3:25 PM', '3:30 PM', '3:35 PM', '3:49 PM', '3:56 PM', '4:01 PM', '4:06 PM', '4:11 PM', '4:17 PM', '4:22 PM', '4:27 PM', '4:35 PM', '4:41 PM', '4:55 PM', '5:03 PM', '5:08 PM', '5:13 PM', '5:18 PM', '5:24 PM', '5:29 PM', '5:34 PM', '5:40 PM', '5:45 PM', '5:50 PM', '5:55 PM', '6:01 PM', '6:06 PM', '6:11 PM', '6:16 PM', '6:22 PM', '6:27 PM', '6:32 PM', '6:38 PM', '6:43 PM', '6:48 PM', '6:57 PM', '7:03 PM', '7:09 PM', '7:14 PM', '7:19 PM', '7:24 PM', '7:30 PM', '7:35 PM', '7:40 PM', '7:46 PM', '7:51 PM', '7:56 PM', '8:01 PM', '8:07 PM', '8:12 PM', '8:17 PM', '8:22 PM', '8:28 PM', '8:33 PM', '8:38 PM', '8:43 PM', '8:49 PM', '8:54 PM', '8:59 PM', '9:05 PM', '9:10 PM', '9:15 PM', '9:20 PM', '9:26 PM', '9:31 PM', '9:36 PM', '9:41 PM', '9:47 PM', '9:52 PM', '9:57 PM', '10:03 PM', '10:08 PM', '10:13 PM', '10:18 PM', '10:24 PM', '10:29 PM', '10:34 PM', '10:39 PM', '10:45 PM', '10:50 PM', '10:55 PM', '11:01 PM', '11:06 PM', '11:11 PM', '11:16 PM', '11:22 PM', '11:27 PM', '11:32 PM', '11:37 PM', '11:43 PM', '11:48 PM', '11:53 PM', '11:59 PM']
In [54]:
print(len(times))
print(len(solarradiation))
269
269
In [37]:
#for each date extract solar radiation data from the html file
Out[37]:
False
In [2]:
import os
import datetime as dt
import time
import re
startdate = dt.date(2016,7,7)
enddate = dt.date(2016,9,15)
r = re.compile('[0-9]+')#regular expression to extract integer string
t = re.compile('[0-9]{1,2}:[0-9]{2} [AP]M')#regular expression to extract time string
curdate = startdate
#datestr = 20160707
while curdate <= enddate:
solarradiation = []
times = []
datestr = str(curdate).replace('-','')
filename = datestr + '.html'
#get the solar radiation values
with open(filename,'r') as html:
#investigating the html that we found that we can look for values on aline per line basis
for line in html.readlines():
#get the solar radiation
if 'data-cell' in line and'w/m' in line:
#add value to radiation list
solarradiation.append(r.search(line).group())
continue
#or
#get the times
if 'heading-cell' in line and t.search(line) != None:
times.append(t.search(line).group())
continue
#check that times and solarradiation have the same length
if len(solarradiation) != len(times):
print('times and radiation records have diffrent sizes for: ',filename,' ...skipping...')
continue
#if psizes match we assume the data was extracted correctly and write it to file
with open('histsolar','a') as out:
for time,radiation in zip(times,solarradiation):
out.write(str(curdate) + ' ' + time + ',' + radiation + '\n')
#increment date by one day
curdate += dt.timedelta(days=1)
In [ ]:
Content source: alejandro-mc/Ecollect
Similar notebooks: