Extracting Location:
Palm Beach Gardens, FL
Wisconsin, USA
Fishers, IN
Filtering tweets based on
"lang":"en""country":"United States" "possibly_sensitive":"false"mysql> create table ElectionTweets (id_str CHAR(18) PRIMARY KEY, month INT(2), day INT(2), loc_name VARCHAR(20), text VARCHAR(140));
Query OK, 0 rows affected (0.00 sec)
mysql> describe ElectionTweets;
+----------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+----------+--------------+------+-----+---------+-------+
| id_str | char(18) | NO | PRI | NULL | |
| month | int(2) | YES | | NULL | |
| day | int(2) | YES | | NULL | |
| loc_name | varchar(20) | YES | | NULL | |
| text | varchar(140) | YES | | NULL | |
+----------+--------------+------+-----+---------+-------+
5 rows in set (0.00 sec)
Handling the time of tweets:
# function to read from database
def process_locations():
conn = MySQLdb.connect (host = "localhost",
user = "USERNAME",
passwd = "PASSWORD",
db = "DATABASE")
cursor = conn.cursor ()
cursor.execute ("SELECT loc_name, count(*) as count FROM ElectionTweets GROUP BY loc_name;")
state_dict = {}
result_set = cursor.fetchall()
for row in result_set:
state_code = False
loc_split = row[0].strip().split(' ')
if (len(loc_split)==2):
if (len(loc_split[1]) == 2):
state_code = loc_split[1]
if (state_code):
if (state_code in state_dict):
state_dict[state_code] += int(row[1])
else:
state_dict[state_code] = int(row[1])
cursor.close ()
conn.close ()
return(state_dict)
In [ ]: