In [110]:
import pandas as pd
In [60]:
data = pd.read_csv('data/articles.csv')
In [10]:
data.head()
Out[10]:
prebody
body
postbody
0
\n\n\n New Straits Ti...
\n\n\nKUALA LUMPUR: MANUFACTURERS and sellers ...
LOAD-DATE: November 1, 2015\n\nLANGUAGE: ENGLI...
1
\n\n\n Spokesman Revie...
\n\n\nAt Smokin' Legal Vaperz, Alex Overman ca...
LOAD-DATE: December 31, 2015\n\nLANGUAGE: ENGL...
2
\n\n\n Spokesman Revie...
\n\n\nEDITORIAL\n\nVaping, like smoking, shoul...
LOAD-DATE: January 1, 2016\n\nLANGUAGE: ENGLIS...
3
\n\n\n The York Dispatc...
\n\n\nReady to quit? Find resources below\n\nC...
LOAD-DATE: April 14, 2015\n\nLANGUAGE: ENGLISH...
4
\n\n\n The Calgary He...
\n\n\nCalgary city council's proposed ban on v...
LOAD-DATE: June 27, 2015\n\nLANGUAGE: ENGLISH\...
In [112]:
# the body text can be accessed like this:
data.body
Out[112]:
0 \n\n\nKUALA LUMPUR: MANUFACTURERS and sellers ...
1 \n\n\nAt Smokin' Legal Vaperz, Alex Overman ca...
2 \n\n\nEDITORIAL\n\nVaping, like smoking, shoul...
3 \n\n\nReady to quit? Find resources below\n\nC...
4 \n\n\nCalgary city council's proposed ban on v...
5 \n\n\nKUALA LUMPUR: THE Health Ministry will n...
6 \n\n\nHIDDEN RISKS: There has been a long deba...
7 \n\n\nBANGOR, Maine -- In its latest effort to...
8 \n\n\nBANGOR, Maine -- In its latest effort to...
9 \n\n\nTHERE are restaurants in Kuala Lumpur of...
10 \n\n\nAS things are on hyper mode in Kuala Lum...
11 \n\n\nQuitting smoking cigarettes was easy for...
12 \n\n\nThe government has not decided on whethe...
13 \n\n\nThe Welsh government's proposal to ban t...
14 \n\n\nSept. 06--SHAMOKIN -- A new business tha...
15 \n\n\nKUANTAN: THE Pahang Islamic Religious an...
16 \n\n\nVaping advocates found relief at city ha...
17 \n\n\nKyra Donaldson, a 20-year-old from Mount...
18 \n\n\nKyra Donaldson, a 20-year-old from Mount...
19 \n\n\nApril 22--SCHUYLKILL HAVEN -- The boroug...
20 \n\n\nTHE issue on "vaping" or smoking e-cigar...
21 \n\nDATELINE: Camarillo\n\n\nCSU Channel Islan...
22 \n\n\nTina Brink hopes the province's new e-ci...
23 \n\n\nTina Brink hopes the province's new e-ci...
24 \n\n\nALOR STAR: There are no plans to ban vap...
25 \n\n\nMonica Or, columnist and Hospitality Con...
26 \n\n\nYou know a word has gone mainstream when...
27 \n\nDATELINE: Camarillo\n\n\nCSU Channel Islan...
28 \n\n\n\nHIGHLIGHT: E-liquid has a strong footh...
29 \n\n\nTHE Welsh government's proposal to ban t...
...
434 \n\n\ncthomas@cnpapers.com 304-348-1232\n\nThe...
435 \n\n\nIT is extraordinary that the 'vaping' re...
436 \n\n\nNo ifs, ands or butts about it.\n\nThe O...
437 \n\nDATELINE: Salem\n\n\nMultnomah County of O...
438 \n\n\nCHAMBERSBURG >> Borough council vo...
439 \n\n\nKUALA LUMPUR: LOCAL authorities here wan...
440 \n\n\nEVERY year the Oxford Dictionary singles...
441 \n\n\nLet's get this out of the way: Vaping is...
442 \n\n\nRESEARCH has found that more women than ...
443 \n\n\nOn Jan. 11, 1964, the Surgeon General's ...
444 \n\nDATELINE: Salem\n\n\nMultnomah County of O...
445 \n\n\nThe first TV advert in nearly half a cen...
446 \n\n\nTrain operator Southern is to ban the us...
447 \n\n\nTrain operator Southern is to ban the us...
448 \n\n\nTrain operator Southern is to ban the us...
449 \n\n\nTrain operator Southern is to ban the us...
450 \n\n\nTrain operator Southern is to ban the us...
451 \n\n\nTrain operator Southern is to ban the us...
452 \n\n\nTrain operator Southern is to ban the us...
453 \n\n\nTrain operator Southern is to ban the us...
454 \n\n\nThe Ontario government was on the right ...
455 \n\n\nResponding to riders complaints, BART pl...
456 \n\n\nResponding to riders complaints, BART pl...
457 \n\n\nE-cigarettes are unattractive to non-smo...
458 \n\n\nE-cigarettes are unattractive to non-smo...
459 \n\n\nWhen dedicated users of ecigarettes gath...
460 \n\n\nI SPOTTED this sign on the front door of...
461 \n\n\nIt was trivia night, and Michael Jamnick...
462 \n\n\nResponding to riders complaints, BART pl...
463 \n\n\nResponding to riders complaints, BART pl...
Name: body, dtype: object
In [11]:
from sklearn.feature_extraction.text import CountVectorizer
In [90]:
cnt_vect = CountVectorizer(min_df=20, stop_words='english')
cnt_vect
Out[90]:
CountVectorizer(analyzer='word', binary=False, decode_error='strict',
dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
lowercase=True, max_df=1.0, max_features=None, min_df=20,
ngram_range=(1, 1), preprocessor=None, stop_words='english',
strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
tokenizer=None, vocabulary=None)
In [128]:
simple_vect = CountVectorizer(token_pattern='\\b\\w+\\b')
text1 = 'This is SOME text that I am going to turn into a document term matrix!'
text2 = 'Another sentence here that we want to add'
In [133]:
toy_matrix = simple_vect.fit_transform([text1, text2])
simple_vect.get_feature_names()
pd.DataFrame(toy_matrix.todense(), columns=simple_vect.get_feature_names(), index=['text1', 'text2']).T
Out[133]:
text1
text2
a
1
0
add
0
1
am
1
0
another
0
1
document
1
0
going
1
0
here
0
1
i
1
0
into
1
0
is
1
0
matrix
1
0
sentence
0
1
some
1
0
term
1
0
text
1
0
that
1
1
this
1
0
to
1
1
turn
1
0
want
0
1
we
0
1
In [92]:
matrix
Out[92]:
<464x1117 sparse matrix of type '<class 'numpy.int64'>'
with 57455 stored elements in Compressed Sparse Row format>
In [ ]:
In [76]:
cnt_vect.get_feature_names()
Out[76]:
['000',
'10',
'100',
'10th',
'11',
'12',
'13',
'14',
'15',
'150',
'16',
'17',
'18',
'19',
'20',
'200',
'2003',
'2006',
'2007',
'2008',
'2009',
'2010',
'2011',
'2012',
'2013',
'2014',
'2015',
'2016',
'2017',
'21',
'22',
'222',
'23',
'24',
'25',
'250',
'26',
'28',
'29',
'30',
'300',
'31',
'35',
'360',
'40',
'400',
'450',
'50',
'500',
'530',
'60',
'65',
'70',
'700',
'80',
'800',
'8th',
'90',
'95',
'99',
'___',
'abdul',
'ability',
'able',
'absolutely',
'abuse',
'acceptable',
'accepted',
'access',
'accessories',
'according',
'account',
'accounted',
'act',
'action',
'active',
'activities',
'activity',
'actual',
'actually',
'adam',
'add',
'added',
'addict',
'addicted',
'addiction',
'addictive',
'addicts',
'adding',
'addition',
'additional',
'additionally',
'additive',
'additives',
'address',
'administration',
'adolescents',
'adopt',
'adopted',
'ads',
'adult',
'adults',
'advantages',
'adverse',
'advertise',
'advertisements',
'advertising',
'advice',
'advised',
'advisory',
'advocacy',
'advocate',
'advocates',
'aerosol',
'affairs',
'affect',
'age',
'agencies',
'agency',
'agents',
'ago',
'agree',
'agreed',
'ahead',
'ahmad',
'aid',
'aids',
'aim',
'air',
'alarming',
'alcohol',
'alike',
'allow',
'allowed',
'allowing',
'allows',
'alongside',
'alternative',
'alternatives',
'altogether',
'amend',
'amended',
'amendment',
'america',
'american',
'americans',
'amounts',
'ample',
'analysis',
'angeles',
'announced',
'annual',
'answer',
'answers',
'anti',
'anymore',
'apparently',
'appeal',
'appealing',
'appear',
'appeared',
'appears',
'apple',
'applies',
'apply',
'approach',
'approval',
'approved',
'april',
'ardent',
'area',
'areas',
'aren',
'argue',
'argued',
'arguing',
'argument',
'arguments',
'array',
'article',
'ash',
'asia',
'ask',
'asked',
'asking',
'aspect',
'aspects',
'associate',
'associated',
'association',
'assume',
'asthma',
'attempts',
'attention',
'attract',
'attractive',
'august',
'authorities',
'authority',
'availability',
'available',
'avenue',
'average',
'avoid',
'aware',
'awareness',
'away',
'bad',
'balance',
'ban',
'banana',
'bangor',
'banned',
'banning',
'bans',
'bar',
'bars',
'base',
'based',
'basic',
'basis',
'batteries',
'battery',
'battle',
'bay',
'beach',
'bear',
'began',
'begin',
'beginning',
'behavior',
'behaviour',
'believe',
'believed',
'believes',
'benefit',
'benefits',
'best',
'better',
'big',
'bigger',
'biggest',
'billion',
'bills',
'bit',
'black',
'blood',
'blow',
'blowing',
'blu',
'blue',
'blueberry',
'board',
'body',
'booming',
'boston',
'bottle',
'bottles',
'bought',
'brain',
'brand',
'brands',
'break',
'breath',
'breathe',
'breathing',
'brief',
'bring',
'bringing',
'brings',
'britain',
'british',
'brought',
'build',
'building',
'buildings',
'built',
'bunch',
'burn',
'burning',
'bus',
'business',
'businesses',
'buy',
'buying',
'byproducts',
'bystanders',
'cabinet',
'cafe',
'california',
'called',
'calling',
'calls',
'came',
'campaign',
'campaigns',
'canada',
'canadian',
'cancer',
'candy',
'car',
'carbon',
'carcinogen',
'carcinogenic',
'carcinogens',
'care',
'careful',
'carries',
'carry',
'carrying',
'cars',
'cartridge',
'cartridges',
'case',
'cases',
'cash',
'category',
'caught',
'cause',
'caused',
'causes',
'causing',
'caution',
'cautious',
'cdc',
'cent',
'center',
'centers',
'centre',
'century',
'ceo',
'certain',
'certainly',
'cessation',
'chain',
'chair',
'chairman',
'challenge',
'chance',
'change',
'changed',
'changes',
'changing',
'channel',
'charge',
'cheap',
'cheaper',
'check',
'chemical',
'chemicals',
'chief',
'child',
'children',
'china',
'chocolate',
'choice',
'choices',
'choose',
'chosen',
'chris',
'chronic',
'cig',
'cigar',
'cigarette',
'cigarettes',
'cigars',
'cigs',
'cited',
'cities',
'citing',
'citizens',
'city',
'claim',
'claimed',
'claims',
'clark',
'class',
'clean',
'clear',
'clearly',
'clinical',
'close',
'closed',
'cloud',
'clouds',
'club',
'coffee',
'coils',
'coincides',
'cold',
'colleagues',
'college',
'com',
'combination',
'combustible',
'come',
'comes',
'coming',
'comment',
'commentators',
'comments',
'commercial',
'commission',
'committed',
'committee',
'common',
'commonly',
'communities',
'community',
'companies',
'company',
'compared',
'competition',
'complaints',
'complete',
'completely',
'complex',
'compliance',
'comply',
'components',
'compounds',
'comprehensive',
'concentrated',
'concentrations',
'concept',
'concern',
'concerned',
'concerning',
'concerns',
'concluded',
'conditions',
'conducted',
'conference',
'confidence',
'confusion',
'conley',
'cons',
'consequences',
'consider',
'consideration',
'considered',
'considering',
'consumer',
'consumers',
'consuming',
'consumption',
'contact',
'contain',
'contained',
'containing',
'contains',
'content',
'contents',
'continue',
'continued',
'continues',
'contrary',
'control',
'controls',
'controversial',
'convenience',
'convention',
'conventional',
'convinced',
'cool',
'copyright',
'corner',
'cosmetics',
'cost',
'costs',
'cotton',
'cough',
'couldn',
'council',
'councilman',
'counter',
'counties',
'countries',
'country',
'county',
'couple',
'course',
'court',
'cover',
'covered',
'craving',
'craze',
'cream',
'create',
'created',
'creates',
'creating',
'crime',
'critics',
'cross',
'cultural',
'culture',
'curb',
'current',
'currently',
'customer',
'customers',
'customize',
'cut',
'daily',
'damage',
'danger',
'dangerous',
'dangers',
'data',
'date',
'dateline',
'datuk',
'david',
'day',
'days',
'deadly',
'deal',
'death',
'deaths',
'debate',
'debated',
'dec',
'decade',
'decades',
'december',
'decide',
'decided',
'decision',
'decisions',
'declared',
'decline',
'declined',
'declining',
'decrease',
'dedicated',
'defined',
'definitely',
'definition',
'deliver',
'delivering',
'delivers',
'delivery',
'democratic',
'demonstrate',
'department',
'dependence',
'depending',
'deputy',
'described',
'design',
'designated',
'designed',
'desire',
'despite',
'detected',
'determine',
'determined',
'develop',
'developed',
'developing',
'development',
'device',
'devices',
'did',
'didn',
'die',
'died',
'difference',
'different',
'difficult',
'direct',
'direction',
'directive',
'directly',
'director',
'discomfort',
'discourage',
'discovered',
'discuss',
'discussed',
'discussion',
'disease',
'diseases',
'display',
'distinction',
'distributed',
'district',
'doctor',
'doctors',
'does',
'doesn',
'doing',
'dollar',
'dollars',
'domestic',
'don',
'door',
'dose',
'double',
'doubled',
'doubt',
'downtown',
'dozen',
'dozens',
'dr',
'draft',
'drag',
'dramatic',
'drawing',
'drawn',
'drink',
'drinking',
'drive',
'driven',
'driving',
'drop',
'dropped',
'drug',
'drugs',
'duncan',
'duty',
'eager',
'earlier',
'early',
'easier',
'easily',
'east',
'easy',
'eating',
'ecigarettes',
'editor',
'editorial',
'educate',
'education',
'educational',
'effect',
'effective',
'effects',
'efficacy',
'efficient',
'effort',
'efforts',
'electronic',
'element',
'elements',
'email',
'emerged',
'emerging',
'emissions',
'emit',
'emitted',
'emotion',
'employee',
'employees',
'employers',
'employment',
'enacted',
'enclosed',
'encourage',
'encouraged',
'encouraging',
'end',
'ending',
'energy',
'enforce',
'enforcement',
'england',
'enjoy',
'enjoying',
'ensure',
'enter',
'enterprise',
'enthusiasts',
'enticing',
'entirely',
'entrances',
'entrepreneurs',
'environment',
'environmental',
'equipment',
'equivalent',
'err',
'escalated',
'especially',
'essentially',
'established',
'establishments',
'estimate',
'estimated',
'estimates',
'eu',
'europe',
'european',
'events',
'eventually',
'everybody',
'evidence',
'ex',
'exactly',
'example',
'excessive',
'excise',
'excited',
'executive',
'exhale',
'exhaled',
'exhaling',
'exist',
'existing',
'exists',
'expand',
'expanding',
'expect',
'expected',
'expensive',
'experience',
'experienced',
'experimentation',
'expert',
'experts',
'explained',
'explains',
'exposed',
'exposure',
'express',
'expressed',
'extended',
'extending',
'extra',
'extremely',
'eye',
'face',
'facebook',
'facilities',
'fact',
'factors',
'facts',
'failed',
'failure',
'fair',
'fall',
'falls',
'false',
'familiar',
'family',
'fans',
'far',
'fast',
'fatwa',
'favor',
'favorite',
'fda',
'fear',
'fears',
'features',
'feb',
'february',
'federal',
'feel',
'feeling',
'feels',
'feet',
'fellow',
'felt',
'fernando',
'fewer',
'fight',
'fighting',
'figure',
'figures',
'filled',
'final',
'finally',
'financial',
'finding',
'findings',
'finds',
'fine',
'fines',
'firm',
'firms',
'fit',
'fix',
'flavor',
'flavored',
'flavoring',
'flavorings',
'flavors',
'flavour',
'flavoured',
'flavouring',
'flavourings',
'flavours',
'focus',
'folks',
'follow',
'following',
'food',
'foot',
'forbidden',
'force',
'foreign',
'forget',
'form',
'formaldehyde',
'forms',
'forward',
'foundation',
'founder',
'franchise',
'free',
'freedom',
'frequently',
'friday',
'friend',
'friends',
'fruit',
'fruity',
'fully',
'fun',
'function',
'funded',
'funding',
'future',
'gabriel',
'gadgets',
'game',
'gas',
'gateway',
'gather',
'gave',
'general',
'generally',
'generation',
'gets',
'getting',
'given',
'gives',
'giving',
'global',
'glycerin',
'glycerine',
'glycol',
'goal',
'goes',
'going',
'gone',
'good',
'goods',
'got',
'gotten',
'gov',
'government',
'governments',
'grade',
'graders',
'gradually',
'granted',
'grasped',
'great',
'greater',
'green',
'gregory',
'ground',
'grounds',
'group',
'groups',
'grow',
'growing',
'grown',
'growth',
'guidance',
'guide',
'guidelines',
'gum',
'habit',
'habits',
'hair',
'half',
'hall',
'hand',
'handed',
'hands',
'happen',
'happening',
'happy',
'haram',
'hard',
'harder',
'hardware',
'harm',
'harmful',
'harmless',
'harms',
'hasn',
'hate',
'haven',
'having',
'hazardous',
'head',
'health',
'healthcare',
'healthier',
'healthy',
'hear',
'heard',
'hearing',
'heart',
'heat',
'heated',
'heating',
'heats',
'heavy',
'held',
'help',
'helped',
'helping',
'helps',
'high',
'higher',
'highlight',
'highly',
'hip',
'history',
'hit',
'hobby',
'hold',
'holding',
'holds',
'home',
'hookah',
'hookahs',
'hooked',
'hope',
'hopes',
'hospital',
'hospitals',
'hours',
'house',
'housing',
'http',
'huge',
'human',
'hundreds',
'hurt',
'husband',
'hypotheses',
'idea',
'ill',
'illegal',
'illnesses',
'immediately',
'impact',
'implement',
'implemented',
'important',
'impose',
'imposed',
'impossible',
'improve',
'improved',
'improving',
'include',
'included',
'includes',
'including',
'inconclusive',
'increase',
'increased',
'increases',
'increasing',
'increasingly',
'independent',
'individual',
'individuals',
'indoor',
'indoors',
'industry',
'information',
'informed',
'ingredients',
'inhalant',
'inhalation',
'inhale',
'inhaled',
'inhalers',
'inhales',
'inhaling',
'initial',
'initiative',
'inside',
'insist',
'instead',
'institute',
'institutes',
'intake',
'intended',
'interested',
'international',
...]
In [83]:
import lda
In [84]:
model = lda.LDA(n_topics=5)
In [93]:
model.fit(matrix)
Out[93]:
<lda.lda.LDA at 0x10bbd3710>
In [48]:
model.doc_topic_
Out[48]:
array([[ 2.24971879e-04, 3.46681665e-01, 2.76940382e-01,
1.66704162e-01, 2.09448819e-01],
[ 1.74367916e-04, 1.62336530e-01, 9.60767219e-02,
3.00087184e-01, 4.41325196e-01],
[ 3.60765550e-01, 5.20255183e-01, 1.30781499e-02,
1.05582137e-01, 3.18979266e-04],
...,
[ 3.39665787e-01, 1.76077397e-01, 1.75901495e-04,
6.17414248e-02, 4.22339490e-01],
[ 5.30434783e-01, 4.14078675e-04, 4.14078675e-04,
2.81987578e-01, 1.86749482e-01],
[ 3.93788820e-01, 4.14078675e-04, 4.14078675e-04,
4.02070393e-01, 2.03312629e-01]])
In [55]:
model.topic_word_.shape
Out[55]:
(5, 247)
In [105]:
matrix2 = cnt_vect.transform(data.body[:10])
matrix2
Out[105]:
<10x1117 sparse matrix of type '<class 'numpy.int64'>'
with 1815 stored elements in Compressed Sparse Row format>
In [106]:
model.transform(matrix2)
WARNING:lda:all zero column in document-term matrix found
Out[106]:
array([[ 0.01959271, 0.04362574, 0.46087429, 0.39479868, 0.08110858],
[ 0.05151475, 0.2908398 , 0.01769963, 0.27227386, 0.36767197],
[ 0.09534772, 0.18483585, 0.11838185, 0.38861033, 0.21282426],
[ 0.07181134, 0.38003069, 0.03227905, 0.46655596, 0.04932296],
[ 0.00313397, 0.07231488, 0.01601725, 0.55261716, 0.35591673],
[ 0.01179145, 0.00565075, 0.49954988, 0.38388034, 0.09912757],
[ 0.13278075, 0.07818933, 0.08257561, 0.70544624, 0.00100806],
[ 0.2162294 , 0.15946174, 0.0040799 , 0.36138612, 0.25884284],
[ 0.2162294 , 0.15946174, 0.0040799 , 0.36138612, 0.25884284],
[ 0.00074296, 0.17144157, 0.38328393, 0.40533007, 0.03920147]])
In [86]:
%matplotlib inline
import pyLDAvis
import numpy as np
In [245]:
doc_lengths = [len(t.split()) for t in data.body.values]
vocab = cnt_vect.get_feature_names()
term_frequency = matrix.sum(axis=0).tolist()
data_dict = {'topic_term_dists': model.topic_word_,
'doc_topic_dists': model.doc_topic_,
'doc_lengths': np.array(doc_lengths),
'vocab': np.array(vocab),
'term_frequency': term_frequency[0]}
In [95]:
vis_data = pyLDAvis.prepare(**data_dict)
In [96]:
pyLDAvis.display(vis_data)
Out[96]:
In [231]:
vis_data.topic_coordinates
Out[231]:
Freq
cluster
topics
x
y
topic
3
44.579511
1
1
0.251403
0.086521
1
21.382101
1
2
-0.013670
-0.166762
4
15.488031
1
3
-0.087000
0.221213
0
9.776710
1
4
0.109880
-0.112079
2
8.773647
1
5
-0.260614
-0.028892
In [183]:
import re
In [256]:
term_dict={}
for tnum,topic in enumerate(model.topic_word_):
terms = list(np.array(vocab)[np.argsort(topic)])[:-20:-1]
term_dict[tnum]=terms
pd.DataFrame(term_dict)
Out[256]:
0
1
2
3
4
0
cigarettes
said
vaping
cigarettes
said
1
said
vaping
said
smoking
cigarettes
2
health
vape
vape
vaping
public
3
percent
nicotine
ban
tobacco
vaping
4
vaping
vapor
health
cigarette
city
5
tobacco
cigarettes
products
nicotine
ban
6
nicotine
like
industry
smokers
smoking
7
products
liquid
government
health
electronic
8
cigarette
shop
state
people
health
9
use
store
ministry
smoke
devices
10
teens
people
new
use
use
11
smoking
customers
tobacco
public
county
12
association
products
dr
like
council
13
school
years
market
way
ordinance
14
american
juice
malaysia
research
places
15
california
quit
minister
quit
law
16
high
new
devices
electronic
board
17
public
devices
liquids
vapour
businesses
18
2014
year
sale
just
proposed
In [269]:
topic=2
topN=40
topic_terms = list(np.array(vocab)[np.argsort(model.topic_word_[topic-1])][:-topN:-1])
term_re = r'\b({})\b'.format('|'.join(topic_terms))
term_re
Out[269]:
'\\b(said|vaping|vape|nicotine|vapor|cigarettes|like|liquid|shop|store|people|customers|products|years|juice|quit|new|devices|year|time|day|business|shops|used|smoking|liquids|industry|want|flavors|stores|vapes|started|smoker|trying|know|called|sell|smoke|opened)\\b'
In [225]:
from IPython.display import Markdown
In [268]:
Markdown(re.sub(term_re, lambda x: '**' + x.group(1).upper() + '**',data.body.iloc[463]))
Out[268]:
Responding to riders complaints, BART plans to join the ranks of PUBLIC transit
agencies snuffing out e-CIGARETTE USE on its trains and stations.
The Bay Area Rapid Transit Board board has scheduled a Feb. 12 final vote on the
ban, after giving unanimous initial approval last week.
The American Lung Association called for the measure, saying it is important to
protect the HEALTH of riders from second-hand vapors and particle pollution from
electronic CIGARETTES.
"If someone pulls out one of this DEVICES on a crowded BART train, you're
stuck," SAID Serena Chen, regional advocacy director for the American Lung
Association in California. "It's not harmless water vapor. They are particles of
NICOTINE and other substances that are listed as harmful toxics."
E-CIGARETTES heat a LIQUID to produce vapors that can carry NICOTINE to the user
-- along with a variety of flavored substances. Sales of the DEVICES are booming
as an alternative to TOBACCO CIGARETTES.
And complaints about USE of the DEVICES on trains also are growing, SAID BART
Director Robert Raburn of Oakland.
While the district bans SMOKING, it has had no policy on electronic CIGARETTES.
Vaping devises would be subject to the same rules as CIGARETTES.
An e-CIGARETTE industry spokesman SAID Tuesday the measure overreaches because
it bans VAPING DEVICES everywhere on BART property -- not just on train cars.
"As a matter of etiquette, you can see a ban on trains," SAID Greg Conley,
president of the American Vaping Association. "But the proposal goes too far and
it's emblematic of the anti-CIGARETTE groups treating electronic CIGARETTES the
same."
The VAPING ASSOCIATION contends e-CIGARETTES are much safer than TOBACCO
CIGARETTES and help many smokers wean themselves off TOBACCO.
The LUNG ASSOCIATION, however, says many people who have never smoked get hooked
on NICOTINE through e-CIGARETTES
At a BART meeting Thursday night, five speakers spoke in favor of the ban.
Penalties would be $100 for first-time violators and $200 for second time
offenders.
Caltrans, AC Transit, Santa Clara Valley Transportation Authority and San
Francisco Muni PUBLIC transit systems already have banned electronic CIGARETTES,
says the American Lung Association.
But some other transit operators such as the San Diego Metropolitan Transit
System have not restricted e-CIGARETTES as they await STATE and federal
guidelines.
A bill introduced this week by State Sen. Mark Leno, D-San Francisco, would
classify e-CIGARETTES as TOBACCO PRODUCTS and bar them from PUBLIC transit
systems, the work place, schools, and in restaurants and bars.
Contact Denis Cuff at 925-943-8267. Follow him at Twitter.com/deniscuff .
In [221]:
pd.DataFrame(model.doc_topic_)
Out[221]:
0
1
2
3
4
0
0.022976
0.034275
0.490019
0.361959
0.090772
1
0.052163
0.279604
0.000247
0.254883
0.413103
2
0.000487
0.209732
0.175669
0.433577
0.180535
3
0.087205
0.348075
0.000248
0.502112
0.062360
4
0.000484
0.073123
0.000484
0.596126
0.329782
5
0.032994
0.000407
0.476986
0.346640
0.142974
6
0.120203
0.086391
0.062722
0.723753
0.006932
7
0.233188
0.174964
0.020670
0.326346
0.244833
8
0.218632
0.201164
0.000291
0.326346
0.253566
9
0.009583
0.195054
0.343431
0.411437
0.040495
10
0.000504
0.116373
0.297733
0.584887
0.000504
11
0.031224
0.486920
0.000281
0.410970
0.070605
12
0.051477
0.000844
0.642194
0.220253
0.085232
13
0.000362
0.000362
0.050995
0.745389
0.202893
14
0.000733
0.660073
0.000733
0.183883
0.154579
15
0.001170
0.036257
0.585965
0.118129
0.258480
16
0.000881
0.318062
0.000881
0.406167
0.274009
17
0.013060
0.517542
0.030986
0.328041
0.110371
18
0.074903
0.517632
0.000257
0.303990
0.103218
19
0.028921
0.566599
0.000407
0.314053
0.090020
20
0.000353
0.000353
0.398942
0.571781
0.028571
21
0.173537
0.153181
0.000509
0.590840
0.081934
22
0.000207
0.252534
0.252534
0.482110
0.012616
23
0.000207
0.217373
0.254602
0.513133
0.014685
24
0.000504
0.005542
0.640302
0.191940
0.161713
25
0.000687
0.337457
0.014433
0.578007
0.069416
26
0.000612
0.398165
0.055657
0.544954
0.000612
27
0.127735
0.132824
0.000509
0.667176
0.071756
28
0.202245
0.247147
0.131151
0.320112
0.099345
29
0.025231
0.000615
0.000615
0.720615
0.252923
...
...
...
...
...
...
434
0.000533
0.656533
0.000533
0.256533
0.085867
435
0.001575
0.096063
0.253543
0.521260
0.127559
436
0.128435
0.019808
0.320128
0.269010
0.262620
437
0.172063
0.076825
0.064127
0.057778
0.629206
438
0.002597
0.132468
0.158442
0.158442
0.548052
439
0.000697
0.000697
0.725436
0.119164
0.154007
440
0.001047
0.472251
0.001047
0.524607
0.001047
441
0.028412
0.496379
0.000557
0.401671
0.072981
442
0.001130
0.012429
0.091525
0.893785
0.001130
443
0.004536
0.049897
0.029278
0.771546
0.144742
444
0.159365
0.146667
0.057778
0.000635
0.635556
445
0.104323
0.052450
0.098559
0.565418
0.179251
446
0.001361
0.001361
0.001361
0.001361
0.994558
447
0.001361
0.001361
0.001361
0.001361
0.994558
448
0.028571
0.014966
0.001361
0.001361
0.953741
449
0.001361
0.001361
0.001361
0.001361
0.994558
450
0.001361
0.001361
0.001361
0.001361
0.994558
451
0.001361
0.001361
0.001361
0.001361
0.994558
452
0.001361
0.001361
0.001361
0.001361
0.994558
453
0.001361
0.001361
0.001361
0.001361
0.994558
454
0.000643
0.019936
0.309325
0.592283
0.077814
455
0.352836
0.000597
0.000597
0.000597
0.645373
456
0.376716
0.000597
0.000597
0.000597
0.621493
457
0.014936
0.000364
0.000364
0.798179
0.186157
458
0.000364
0.000364
0.000364
0.790893
0.208015
459
0.032314
0.250655
0.067249
0.562154
0.087627
460
0.002299
0.370115
0.117241
0.462069
0.048276
461
0.126104
0.195716
0.000268
0.543775
0.134137
462
0.431268
0.000590
0.000590
0.000590
0.566962
463
0.437168
0.000590
0.000590
0.000590
0.561062
464 rows × 5 columns
In [ ]:
Content source: mbod/intro_python_for_comm
Similar notebooks: