Twython Approach to Scrap Tweets


In [4]:
#import twython and necessary modules for tweet manipulation
import sys
import string
import simplejson
from twython import Twython

In [3]:
import datetime
now = datetime.datetime.now()
day=int(now.day)
month=int(now.month)
year=int(now.year)

In [4]:
#use twitter api to get your keys and tokens
t = Twython(app_key='########', 
    app_secret='########',
    oauth_token='########',
    oauth_token_secret='########')

In [5]:
#listing random ids to apply twython's lookupuser feature
ids = "4816,9715012,13023422, 13393052,  14226882,  14235041, 14292458, 14335586, 14730894,\
    15029174, 15474846, 15634728, 15689319, 15782399, 15946841, 16116519, 16148677, 16223542,\
    16315120, 16566133, 16686673, 16801671, 41900627, 42645839, 42731742, 44157002, 44988185,\
    48073289, 48827616, 49702654, 50310311, 50361094,"

In [6]:
users = t.lookup_user(user_id = ids)

In [7]:
outfn = "twitter_user_data_%i.%i.%i.txt" % (now.month, now.day, now.year)

In [8]:
fields = "id screen_name name created_at url followers_count friends_count statuses_count \
    favourites_count listed_count \
    contributors_enabled description protected location lang expanded_url".split()

In [10]:
outfp = open(outfn, "w")
outfp.write(string.join(fields, "\t") + "\n")

In [11]:
for entry in users:
    r = {}
    for f in fields:
        r[f] = ""
    r['id'] = entry['id']
    r['screen_name'] = entry['screen_name']
    r['name'] = entry['name']
    r['created_at'] = entry['created_at']
    r['url'] = entry['url']
    r['followers_count'] = entry['followers_count']
    r['friends_count'] = entry['friends_count']
    r['statuses_count'] = entry['statuses_count']
    r['favourites_count'] = entry['favourites_count']
    r['listed_count'] = entry['listed_count']
    r['contributors_enabled'] = entry['contributors_enabled']
    r['description'] = entry['description']
    r['protected'] = entry['protected']
    r['location'] = entry['location']
    r['lang'] = entry['lang']
    if 'url' in entry['entities']:
        r['expanded_url'] = entry['entities']['url']['urls'][0]['expanded_url']
    else:
        r['expanded_url'] = ''
    print r
    lst = []
    for f in fields:
        lst.append(unicode(r[f]).replace("\/", "/"))
    outfp.write(string.join(lst, "\t").encode("utf-8") + "\n")

outfp.close()


{'lang': u'en', 'statuses_count': 11149, 'name': u'EFF', 'friends_count': 884, 'url': u'https://t.co/0UKsdjXnWw', 'created_at': u'Mon Aug 28 14:17:28 +0000 2006', 'description': u"We're the Electronic Frontier Foundation. We defend your civil liberties in a digital world.", 'contributors_enabled': False, 'followers_count': 308364, 'protected': False, 'expanded_url': u'https://www.eff.org', 'location': u'San Francisco, CA', 'favourites_count': 61, 'listed_count': 12821, 'id': 4816, 'screen_name': u'EFF'}
{'lang': u'en', 'statuses_count': 9903, 'name': u'PFAW', 'friends_count': 5206, 'url': u'http://t.co/XnA9YzPaGr', 'created_at': u'Fri Oct 26 17:14:21 +0000 2007', 'description': u"We're People For the American Way: a progressive non-profit org working for equal rights and constitutional liberties.", 'contributors_enabled': False, 'followers_count': 15421, 'protected': False, 'expanded_url': u'http://www.pfaw.org', 'location': u'Washington, DC', 'favourites_count': 130, 'listed_count': 895, 'id': 9715012, 'screen_name': u'peoplefor'}
{'lang': u'en', 'statuses_count': 3716, 'name': u'White House Project', 'friends_count': 578, 'url': u'http://t.co/7X0bJKscGf', 'created_at': u'Sun Feb 03 22:11:49 +0000 2008', 'description': u'TWHP ignites the leadership of women in business and politics. We connect, coach, and educate an ever-expanding alumnae network of over 14,000 women.', 'contributors_enabled': False, 'followers_count': 6821, 'protected': False, 'expanded_url': u'http://www.thewhitehouseproject.org/', 'location': u'New York, NY', 'favourites_count': 304, 'listed_count': 396, 'id': 13023422, 'screen_name': u'TWHP'}
{'lang': u'en', 'statuses_count': 23166, 'name': u'ACLU National', 'friends_count': 978, 'url': u'http://t.co/AF1gbNvLJ8', 'created_at': u'Tue Feb 12 16:27:34 +0000 2008', 'description': u'The ACLU is a nonprofit, nonpartisan, legal and advocacy organization devoted to protecting the basic civil liberties of everyone in America.', 'contributors_enabled': False, 'followers_count': 265452, 'protected': False, 'expanded_url': u'http://www.aclu.org', 'location': u'All 50 states ', 'favourites_count': 2983, 'listed_count': 8147, 'id': 13393052, 'screen_name': u'ACLU'}
{'lang': u'en', 'statuses_count': 22390, 'name': u'NCLR', 'friends_count': 10028, 'url': u'http://t.co/F1x2vX77GE', 'created_at': u'Wed Mar 26 15:54:50 +0000 2008', 'description': u'The largest national Latino civil rights and advocacy organization in the United States, NCLR works to improve opportunities for Hispanic Americans.', 'contributors_enabled': False, 'followers_count': 55017, 'protected': False, 'expanded_url': u'http://www.nclr.org', 'location': u'Washington, DC', 'favourites_count': 1125, 'listed_count': 1753, 'id': 14226882, 'screen_name': u'NCLR'}
{'lang': u'en', 'statuses_count': 6647, 'name': u'Americans United', 'friends_count': 631, 'url': u'http://t.co/B2yPlAK9Lc', 'created_at': u'Thu Mar 27 13:23:30 +0000 2008', 'description': u"Americans United for Separation of Church and State is a national religious liberty watchdog group. We've been protecting the wall of separation since 1947!", 'contributors_enabled': False, 'followers_count': 11232, 'protected': False, 'expanded_url': u'http://www.au.org/donate/', 'location': u'Washington, D.C. ', 'favourites_count': 154, 'listed_count': 573, 'id': 14235041, 'screen_name': u'americansunited'}
{'lang': u'en', 'statuses_count': 25165, 'name': u'Free Press', 'friends_count': 22693, 'url': u'http://t.co/GUKQP3fY5g', 'created_at': u'Thu Apr 03 11:43:22 +0000 2008', 'description': u'We\u2019re a nonpartisan organization fighting for your rights to connect and communicate.', 'contributors_enabled': False, 'followers_count': 47819, 'protected': False, 'expanded_url': u'http://www.freepress.net', 'location': u'Northampton, MA and Washington', 'favourites_count': 1025, 'listed_count': 2664, 'id': 14292458, 'screen_name': u'freepress'}
{'lang': u'en', 'statuses_count': 14131, 'name': u'Public Citizen', 'friends_count': 4347, 'url': u'http://t.co/GlJHWn6DtV', 'created_at': u'Tue Apr 08 20:07:57 +0000 2008', 'description': u'Public Citizen is a national, nonprofit advocacy org that has been standing up to corporate power and holding government accountable for 45 yrs.', 'contributors_enabled': False, 'followers_count': 36649, 'protected': False, 'expanded_url': u'http://www.citizen.org', 'location': u'Washington, DC', 'favourites_count': 740, 'listed_count': 1757, 'id': 14335586, 'screen_name': u'Public_Citizen'}
{'lang': u'en', 'statuses_count': 49790, 'name': u'FreedomWorks', 'friends_count': 21931, 'url': u'http://t.co/TXVi1kh6Nt', 'created_at': u'Sun May 11 04:29:14 +0000 2008', 'description': u'Lower Taxes, Less Government, More Freedom! Educating and equipping a grassroots army for liberty. #grassrootsmatter\n#tcot #tlot', 'contributors_enabled': False, 'followers_count': 256069, 'protected': False, 'expanded_url': u'http://www.FreedomWorks.org', 'location': u'Washington, DC', 'favourites_count': 472, 'listed_count': 4265, 'id': 14730894, 'screen_name': u'FreedomWorks'}
{'lang': u'en', 'statuses_count': 8229, 'name': u'Natl Taxpayers Union', 'friends_count': 3500, 'url': u'http://t.co/VN03k9KLeq', 'created_at': u'Fri Jun 06 13:03:31 +0000 2008', 'description': u"Official Twitter account of National Taxpayers Union: The Voice of America's Taxpayers.", 'contributors_enabled': False, 'followers_count': 9704, 'protected': False, 'expanded_url': u'http://www.ntu.org', 'location': u'Washington, DC', 'favourites_count': 61, 'listed_count': 369, 'id': 15029174, 'screen_name': u'NTU'}
{'lang': u'en', 'statuses_count': 8664, 'name': u'ACLU of Northern CA', 'friends_count': 2385, 'url': u'https://t.co/SHTN3GdFrV', 'created_at': u'Thu Jul 17 22:25:59 +0000 2008', 'description': u'The largest @ACLU affiliate in the nation. We like justice, freedom, equality, and short walks on the beach. Followed by more justice.', 'contributors_enabled': False, 'followers_count': 11788, 'protected': False, 'expanded_url': u'https://www.aclunc.org', 'location': u'San Francisco, CA', 'favourites_count': 4083, 'listed_count': 667, 'id': 15474846, 'screen_name': u'ACLU_NorCal'}
{'lang': u'en', 'statuses_count': 15289, 'name': u'The CCR', 'friends_count': 2716, 'url': u'http://t.co/Kqu5kxHrTV', 'created_at': u'Mon Jul 28 17:23:38 +0000 2008', 'description': u'The Center for Constitutional Rights is dedicated to advancing and protecting the rights guaranteed by the U.S. Constitution and the UDHR.', 'contributors_enabled': False, 'followers_count': 16724, 'protected': False, 'expanded_url': u'http://www.ccrjustice.org', 'location': u'New York, NY', 'favourites_count': 1568, 'listed_count': 987, 'id': 15634728, 'screen_name': u'theCCR'}
{'lang': u'en', 'statuses_count': 8727, 'name': u'JWI', 'friends_count': 1325, 'url': u'http://t.co/e6zRAd5d6K', 'created_at': u'Fri Aug 01 14:06:17 +0000 2008', 'description': u'Working to protect fundamental rights of women and girls; building safe homes, healthy relationships, strong women; publishers of https://t.co/R91242V2WB.', 'contributors_enabled': False, 'followers_count': 4952, 'protected': False, 'expanded_url': u'http://www.jwi.org', 'location': u'Washington, DC', 'favourites_count': 1504, 'listed_count': 206, 'id': 15689319, 'screen_name': u'JewishWomenIntl'}
{'lang': u'en', 'statuses_count': 3028, 'name': u'InstituteForJustice', 'friends_count': 12147, 'url': u'http://t.co/Neu1ppC9f3', 'created_at': u'Fri Aug 08 20:10:53 +0000 2008', 'description': u'The Institute for Justice is the national law firm for liberty.  Follow IJ: protect property, economic liberty, free speech and school choice!', 'contributors_enabled': False, 'followers_count': 31031, 'protected': False, 'expanded_url': u'http://www.ij.org', 'location': u'Arlington, Virginia', 'favourites_count': 162, 'listed_count': 1321, 'id': 15782399, 'screen_name': u'IJ'}
{'lang': u'en', 'statuses_count': 13214, 'name': u'PVA1946', 'friends_count': 12433, 'url': u'http://t.co/iW3P7oPayw', 'created_at': u'Fri Aug 22 16:27:27 +0000 2008', 'description': u'Paralyzed Veterans of America', 'contributors_enabled': False, 'followers_count': 17920, 'protected': False, 'expanded_url': u'http://www.pva.org', 'location': u'Washington, D.C.', 'favourites_count': 21947, 'listed_count': 581, 'id': 15946841, 'screen_name': u'PVA1946'}
{'lang': u'en', 'statuses_count': 11759, 'name': u'POGOBlog', 'friends_count': 5635, 'url': u'http://t.co/6pYUYfi5QE', 'created_at': u'Wed Sep 03 17:51:22 +0000 2008', 'description': u'Exposing corruption, exploring solutions. SecureDrop - https://t.co/zd6Mk3IWni\nAlso at: @StrausReform & @AskMoreKnowMore.', 'contributors_enabled': False, 'followers_count': 9554, 'protected': False, 'expanded_url': u'http://www.pogo.org', 'location': u'Washington, DC', 'favourites_count': 2867, 'listed_count': 566, 'id': 16116519, 'screen_name': u'POGOBlog'}
{'lang': u'en', 'statuses_count': 10202, 'name': u'YAF', 'friends_count': 1558, 'url': u'https://t.co/A3I9nSBnlM', 'created_at': u'Fri Sep 05 20:04:51 +0000 2008', 'description': u"Young America's Foundation introduces students to conservatism through our campus programs and conferences. The Foundation also preserves the Reagan Ranch.", 'contributors_enabled': False, 'followers_count': 17232, 'protected': False, 'expanded_url': u'http://www.yaf.org', 'location': u'Reston, VA', 'favourites_count': 2491, 'listed_count': 419, 'id': 16148677, 'screen_name': u'yaf'}
{'lang': u'en', 'statuses_count': 3079, 'name': u'RCRC', 'friends_count': 1062, 'url': u'https://t.co/4HIPUnBS0c', 'created_at': u'Wed Sep 10 16:49:22 +0000 2008', 'description': u'Religious Coalition for Reproductive Choice (RCRC): Pro-Family, Pro-Faith, Pro-Choice! #ItsTime', 'contributors_enabled': False, 'followers_count': 3887, 'protected': False, 'expanded_url': u'http://www.rcrc.org', 'location': u'Washington, DC', 'favourites_count': 237, 'listed_count': 177, 'id': 16223542, 'screen_name': u'RCRChoice'}
{'lang': u'en', 'statuses_count': 2154, 'name': u'CBLPI', 'friends_count': 1078, 'url': u'http://t.co/EdlFzudGvI', 'created_at': u'Tue Sep 16 18:48:29 +0000 2008', 'description': u'Preparing and promoting conservative women leaders.', 'contributors_enabled': False, 'followers_count': 2369, 'protected': False, 'expanded_url': u'http://www.cblpi.org', 'location': u'Herndon, VA', 'favourites_count': 229, 'listed_count': 92, 'id': 16315120, 'screen_name': u'CBLPI'}
{'lang': u'en', 'statuses_count': 6419, 'name': u'Victory Fund', 'friends_count': 2220, 'url': u'http://t.co/KowEiVTxEE', 'created_at': u'Thu Oct 02 20:27:57 +0000 2008', 'description': u'For 25 years, the Gay & Lesbian Victory Fund and Institute has worked to recruit, train and elect LGBT public officials.', 'contributors_enabled': False, 'followers_count': 19060, 'protected': False, 'expanded_url': u'http://victoryfund.org', 'location': u'Washington, DC', 'favourites_count': 811, 'listed_count': 712, 'id': 16566133, 'screen_name': u'VictoryFund'}
{'lang': u'en', 'statuses_count': 14671, 'name': u'Tax Foundation', 'friends_count': 326, 'url': u'http://t.co/yMRykSBZXh', 'created_at': u'Fri Oct 10 18:13:37 +0000 2008', 'description': u'Sound Tax Policy since 1937.', 'contributors_enabled': False, 'followers_count': 19632, 'protected': False, 'expanded_url': u'http://www.taxfoundation.org', 'location': u'Washington, DC', 'favourites_count': 1334, 'listed_count': 781, 'id': 16686673, 'screen_name': u'taxfoundation'}
{'lang': u'en', 'statuses_count': 11985, 'name': u'NatImmForum', 'friends_count': 1882, 'url': u'http://t.co/dP5lbIKrpK', 'created_at': u'Thu Oct 16 03:33:35 +0000 2008', 'description': u'The National Immigration Forum advocates for the value of immigrants and immigration to the nation.', 'contributors_enabled': False, 'followers_count': 12927, 'protected': False, 'expanded_url': u'http://www.immigrationforum.org', 'location': u'Washington, DC', 'favourites_count': 529, 'listed_count': 491, 'id': 16801671, 'screen_name': u'NatImmForum'}
{'lang': u'en', 'statuses_count': 301, 'name': u'Bet Tzedek', 'friends_count': 212, 'url': u'http://t.co/4QPXrzY8fU', 'created_at': u'Fri May 22 21:03:59 +0000 2009', 'description': u'Dedicated to justice for all.', 'contributors_enabled': False, 'followers_count': 792, 'protected': False, 'expanded_url': u'http://bettzedek.org', 'location': u'Los Angeles, CA', 'favourites_count': 38, 'listed_count': 38, 'id': 41900627, 'screen_name': u'BetTzedek'}
{'lang': u'en', 'statuses_count': 6320, 'name': u'American Family Assc', 'friends_count': 307, 'url': u'https://t.co/vwiNqZOzmz', 'created_at': u'Tue May 26 15:23:05 +0000 2009', 'description': u'Since 1977 American Family Association has existed to inform & equip individuals to strengthen the moral foundations of American culture. Radio network: @AFRnet', 'contributors_enabled': False, 'followers_count': 10733, 'protected': False, 'expanded_url': u'http://www.afa.net', 'location': u'Tupelo, Mississippi', 'favourites_count': 1754, 'listed_count': 354, 'id': 42645839, 'screen_name': u'AmericanFamAssc'}
{'lang': u'en', 'statuses_count': 8432, 'name': u'Semper Fi Fund', 'friends_count': 13810, 'url': u'https://t.co/G0yzZnkSHN', 'created_at': u'Tue May 26 21:49:09 +0000 2009', 'description': u'Immediate assistance and lifetime support for wounded, critically ill and injured service members, veterans & their families.', 'contributors_enabled': False, 'followers_count': 19976, 'protected': False, 'expanded_url': u'http://www.semperfifund.org', 'location': u'Facebook.com/SemperFiFund', 'favourites_count': 10638, 'listed_count': 383, 'id': 42731742, 'screen_name': u'SemperFiFund'}
{'lang': u'en', 'statuses_count': 630, 'name': u'Patrick Henry Center', 'friends_count': 251, 'url': u'http://t.co/OZEDbXQaRg', 'created_at': u'Tue Jun 02 17:02:20 +0000 2009', 'description': u'The Patrick Henry Center serves as the only national organization devoted to promoting political courage and love of liberty as exemplified by Patrick Henry.', 'contributors_enabled': False, 'followers_count': 296, 'protected': False, 'expanded_url': u'http://www.patrickhenrycenter.com', 'location': u'Manassas, VA', 'favourites_count': 0, 'listed_count': 18, 'id': 44157002, 'screen_name': u'PatrickHenryCtr'}
{'lang': u'en', 'statuses_count': 16066, 'name': u'NAACP', 'friends_count': 11731, 'url': u'http://t.co/ZN6Tm0vIOF', 'created_at': u'Fri Jun 05 20:32:34 +0000 2009', 'description': u"Founded Feb. 12, 1909, the NAACP is the nation's oldest and largest grassroots\u2013based civil rights organization. Over 2,000 volunteer-run branches nationwide.", 'contributors_enabled': False, 'followers_count': 141893, 'protected': False, 'expanded_url': u'http://www.naacp.org', 'location': u'Baltimore, MD', 'favourites_count': 962, 'listed_count': 2503, 'id': 44988185, 'screen_name': u'NAACP'}
{'lang': u'en', 'statuses_count': 12001, 'name': u'NatlMilitaryFamAssoc', 'friends_count': 2359, 'url': u'http://t.co/cbJJKf8Uwo', 'created_at': u'Wed Jun 17 19:04:09 +0000 2009', 'description': u'The National Military Family Association tweets! Find us on Facebook at http://t.co/XZV9Dgjz or read our blog at http://t.co/e49g9RRM.', 'contributors_enabled': False, 'followers_count': 18857, 'protected': False, 'expanded_url': u'http://www.militaryfamily.org', 'location': u'Alexandria, VA', 'favourites_count': 2280, 'listed_count': 419, 'id': 48073289, 'screen_name': u'military_family'}
{'lang': u'en', 'statuses_count': 6597, 'name': u'NCADV', 'friends_count': 1657, 'url': u'http://t.co/Xvz8myvjBw', 'created_at': u'Fri Jun 19 21:03:11 +0000 2009', 'description': u'The NCADV is the voice of victims and survivors. We are the catalyst for changing society to have zero tolerance for domestic violence.', 'contributors_enabled': False, 'followers_count': 12962, 'protected': False, 'expanded_url': u'http://www.ncadv.org', 'location': u'Denver, CO & Washington DC', 'favourites_count': 1112, 'listed_count': 393, 'id': 48827616, 'screen_name': u'NCADV'}
{'lang': u'en', 'statuses_count': 8058, 'name': u'Gov Acct Proj', 'friends_count': 641, 'url': u'http://t.co/wugzpeA6Su', 'created_at': u'Mon Jun 22 17:44:23 +0000 2009', 'description': u'Government Accountability Project (GAP) is a nonprofit public interest group that promotes government and corporate accountability by defending whistleblowers.', 'contributors_enabled': False, 'followers_count': 5973, 'protected': False, 'expanded_url': u'http://whistleblower.org/', 'location': u'Washington, D.C.', 'favourites_count': 47, 'listed_count': 422, 'id': 49702654, 'screen_name': u'GovAcctProj'}
{'lang': u'en', 'statuses_count': 13504, 'name': u'CenterforReproRights', 'friends_count': 1511, 'url': u'http://t.co/1HJ54Fa8w9', 'created_at': u'Wed Jun 24 13:53:54 +0000 2009', 'description': u'For more than 20 years, the Center for Reproductive Rights has used the law to advance reproductive freedom as a fundamental human right.', 'contributors_enabled': False, 'followers_count': 41606, 'protected': False, 'expanded_url': u'http://reproductiverights.org/', 'location': u'New York', 'favourites_count': 8237, 'listed_count': 1229, 'id': 50310311, 'screen_name': u'ReproRights'}
{'lang': u'en', 'statuses_count': 428, 'name': u'Mt. States Legal Fnd', 'friends_count': 88, 'url': u'http://t.co/EnYV81j1kf', 'created_at': u'Wed Jun 24 16:52:53 +0000 2009', 'description': u'A nonprofit, public-interest legal fnd that fights for individual liberty, the right to own and use property, limited and ethical govt. & free enterprise.', 'contributors_enabled': False, 'followers_count': 139, 'protected': False, 'expanded_url': u'http://www.mountainstateslegal.org/', 'location': u'Denver, CO', 'favourites_count': 0, 'listed_count': 3, 'id': 50361094, 'screen_name': u'MSLF'}