In [1]:
import pandas as pd
import numpy as np
import glob
import time
import os
import pymarc
import marcx
import codecs
import string
os.chdir('/Volumes/jwa_drive1/git/pardee')

In [2]:
df_contents = pd.read_csv('contents.csv',sep=',')
df_contents = df_contents.dropna()
df_sources = pd.read_csv('source.csv',sep=',')
df_surveys = pd.read_csv('survey.csv', sep=',',dtype={'naics':str})
df_crosswalk = pd.read_excel('SIC_NASIC_Crosswalk.xlsx')
df_crosswalk.rename(columns={'NAICS Industry\xa0':'NAICS Industry'}, inplace=True)
df_crosswalk['NAICS'] = df_crosswalk['NAICS'].astype(str)
df_crosswalk['SIC'] = df_crosswalk['SIC'].astype(str)
df_surveys['naics'] = df_surveys['naics'].astype(str)
df_surveys['sic'] = df_surveys['sic'].astype(str)
header = '<?xml version="1.0" encoding="UTF-8" ?><collection xmlns:marc="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">'
footer = '</collection>'
print(df_surveys.columns)
print(df_contents.columns)
print(df_sources.columns)
#df_sources.set_index('c_so_id')


Index(['sur_id', 'title', 'sic', 'keyword', 'naics'], dtype='object')
Index(['c_id', 'c_sur_id', 'c_so_id', 'c_url'], dtype='object')
Index(['so_id', 's_title', 'location', 'type', 'u_type', 's_url', 'g_url',
       'ezpx'],
      dtype='object')

In [3]:
def norm_str(s):
    exclude = set(string.punctuation)
    exclude.add(' ')
    if not type(s) == str:
        s = str(s)
    s = ''.join(ch for ch in s if ch not in exclude)
    #s=s.replace(' ','')
    return(s.lower())

def get_sic(s,df):
    #print(type(df.ntitle))
    #print(type(s.ntitle))
    mask = df.ntitle == s.ntitle
    d = df[mask]
    #print(type(d))
    return(d)

In [4]:
out = codecs.open('PardeeIS.xml','w', 'utf-8')
out.write(header)
counter = 0
no_856 = 0
proxy_list = ['galegroup.com/','lexisnexis.com/','www.mergentonline.com/','standardandpoors.com/','valueline.com/','proquest.com/','ibisworld.com/','ebscohost.com/']
for index,row in df_surveys.iterrows():
    ## default leader and 008 values
    leader_value = '#####naa#a22#####3##4500'
    _008_value = '######s2015####xxu####eote###000#0#eng#d'
    ## grab the values for sic, naics, keywords from the dataframe
    sic = row['sic']
    naics = row['naics']
    keywords = row['keyword']
    title = row['title']
    ## create empty lists for sics, urls
    sics = []
    urls = []
    ## if no value for sic, lookup in df_crosswalk dataframe
    if sic == 'nan':
        mask = df_crosswalk['NAICS'] == naics
        for index,r in df_crosswalk[mask].iterrows():
            sics.append(r['SIC'])
    else:
        sics.append(sic)
    try:
        sic = sics[0]
    except:
        sic = ''
    ## check for multiple values in the naics field. If so, split them into a list
    naics = naics.replace(' ','')
    if naics.find(',') > 0:
        naics = naics.split(',')
    else:
        naics = [naics]
    ##
    ## get the url
    ##
    mask = df_contents['c_sur_id'] == row['sur_id']
    for index,r in df_contents[mask].iterrows():
        smask = df_sources['so_id'] == r['c_so_id']
        for s_i,s_r in df_sources[smask].iterrows():
            s_title = s_r['s_title']
            s_location = s_r['location']
        urls.append([r['c_url'],s_title,s_location])
    ## create a list of naics
    mask = df_crosswalk['SIC'] == row['sic']
    naics_rows = df_crosswalk[mask]
    ## create a marc record and add fields
    record = marcx.FatRecord()
    record.leader = leader_value
    record.add('008',data = _008_value)
    for n in sics:
        if n != 'nan':
            record.add('024', a=str(n),_2='SICS',indicators=['7',' '])
    for n in naics:
        if n != 'nan':
            record.add('024', a=str(n),_2='NAICS',indicators=['7',' '])
    record.add('041',a='eng')
    record.add('245',a = str(title),indicators=['0','0'])
    ## lookup the NAICS Industry description and add it to a note field
    for index,row in df_crosswalk[mask].iterrows():
        try:
            record.add('500',a = row['NAICS Industry'])
        except:
            pass
    ## add the keywords to a 650 field. Is this the best field?
    record.add('650',a = str(keywords),indicators=['0','4'])
    ## iterate through the urls and add to an 856
    ## need to add a subfield z and subfield 3 with the title and source from the df_sources dataframe
    ##
    record.add('902',a='PardeeIndustrySurvey')
    for n in urls:
        record.remove('856')
        url = str(n[0])
        s_tit = str(n[1])
        s_loc = str(n[2])
        s_link_text = ''
        if s_tit == 'Encyclopedia of American Industries' and len(sic) > 0:
            s_link_text = 'Search SIC [' + sic +'] in Quick Search Box'
        for px in proxy_list:
            if url.find(px) > 0:
                url = url.replace(px,px[:-1]+ '.ezproxy.bu.edu/')
        if s_tit.find('Encyclopedia') == 0:
            record.add('856',u=url,z=s_tit,n=s_loc,y=s_link_text,indicators=['4','0'])
        else:
            record.add('856',u=url,z=s_tit,n=s_loc,indicators=['4','0'])

        if record.has('856'):
            out.write(pymarc.record_to_xml(record ).decode("utf-8"))
            counter += 1
            try:
                print('Record: ',str(counter))
                #print(record)
            except Exception as e:
                print(e)
        else:
            no_856 += 1
            print()
            print("no 856",str(no_856))
            print(record)
out.write(footer)
out.close()


Record:  1
Record:  2
Record:  3
Record:  4
Record:  5
Record:  6
Record:  7
Record:  8
Record:  9
Record:  10
Record:  11
Record:  12
Record:  13
Record:  14
Record:  15
Record:  16
Record:  17
Record:  18
Record:  19
Record:  20
Record:  21
Record:  22
Record:  23
Record:  24
Record:  25
Record:  26
Record:  27
Record:  28
Record:  29
Record:  30
Record:  31
Record:  32
Record:  33
Record:  34
Record:  35
Record:  36
Record:  37
Record:  38
Record:  39
Record:  40
Record:  41
Record:  42
Record:  43
Record:  44
Record:  45
Record:  46
Record:  47
Record:  48
Record:  49
Record:  50
Record:  51
Record:  52
Record:  53
Record:  54
Record:  55
Record:  56
Record:  57
Record:  58
Record:  59
Record:  60
Record:  61
Record:  62
Record:  63
Record:  64
Record:  65
Record:  66
Record:  67
Record:  68
Record:  69
Record:  70
Record:  71
Record:  72
Record:  73
Record:  74
Record:  75
Record:  76
Record:  77
Record:  78
Record:  79
Record:  80
Record:  81
Record:  82
Record:  83
Record:  84
Record:  85
Record:  86
Record:  87
Record:  88
Record:  89
Record:  90
Record:  91
Record:  92
Record:  93
Record:  94
Record:  95
Record:  96
Record:  97
Record:  98
Record:  99
Record:  100
Record:  101
Record:  102
Record:  103
Record:  104
Record:  105
Record:  106
Record:  107
Record:  108
Record:  109
Record:  110
Record:  111
Record:  112
Record:  113
Record:  114
Record:  115
Record:  116
Record:  117
Record:  118
Record:  119
Record:  120
Record:  121
Record:  122
Record:  123
Record:  124
Record:  125
Record:  126
Record:  127
Record:  128
Record:  129
Record:  130
Record:  131
Record:  132
Record:  133
Record:  134
Record:  135
Record:  136
Record:  137
Record:  138
Record:  139
Record:  140
Record:  141
Record:  142
Record:  143
Record:  144
Record:  145
Record:  146
Record:  147
Record:  148
Record:  149
Record:  150
Record:  151
Record:  152
Record:  153
Record:  154
Record:  155
Record:  156
Record:  157
Record:  158
Record:  159
Record:  160
Record:  161
Record:  162
Record:  163
Record:  164
Record:  165
Record:  166
Record:  167
Record:  168
Record:  169
Record:  170
Record:  171
Record:  172
Record:  173
Record:  174
Record:  175
Record:  176
Record:  177
Record:  178
Record:  179
Record:  180
Record:  181
Record:  182
Record:  183
Record:  184
Record:  185
Record:  186
Record:  187
Record:  188
Record:  189
Record:  190
Record:  191
Record:  192
Record:  193
Record:  194
Record:  195
Record:  196
Record:  197
Record:  198
Record:  199
Record:  200
Record:  201
Record:  202
Record:  203
Record:  204
Record:  205
Record:  206
Record:  207
Record:  208
Record:  209
Record:  210
Record:  211
Record:  212
Record:  213
Record:  214
Record:  215
Record:  216
Record:  217
Record:  218
Record:  219
Record:  220
Record:  221
Record:  222
Record:  223
Record:  224
Record:  225
Record:  226
Record:  227
Record:  228
Record:  229
Record:  230
Record:  231
Record:  232
Record:  233
Record:  234
Record:  235
Record:  236
Record:  237
Record:  238
Record:  239
Record:  240
Record:  241
Record:  242
Record:  243
Record:  244
Record:  245
Record:  246
Record:  247
Record:  248
Record:  249
Record:  250
Record:  251
Record:  252
Record:  253
Record:  254
Record:  255
Record:  256
Record:  257
Record:  258
Record:  259
Record:  260
Record:  261
Record:  262
Record:  263
Record:  264
Record:  265
Record:  266
Record:  267
Record:  268
Record:  269
Record:  270
Record:  271
Record:  272
Record:  273
Record:  274
Record:  275
Record:  276
Record:  277
Record:  278
Record:  279
Record:  280
Record:  281
Record:  282
Record:  283
Record:  284
Record:  285
Record:  286
Record:  287
Record:  288
Record:  289
Record:  290
Record:  291
Record:  292
Record:  293
Record:  294
Record:  295
Record:  296
Record:  297
Record:  298
Record:  299
Record:  300
Record:  301
Record:  302
Record:  303
Record:  304
Record:  305
Record:  306
Record:  307
Record:  308
Record:  309
Record:  310
Record:  311
Record:  312
Record:  313
Record:  314
Record:  315
Record:  316
Record:  317
Record:  318
Record:  319
Record:  320
Record:  321
Record:  322
Record:  323
Record:  324
Record:  325
Record:  326
Record:  327
Record:  328
Record:  329
Record:  330
Record:  331
Record:  332
Record:  333
Record:  334
Record:  335
Record:  336
Record:  337
Record:  338
Record:  339
Record:  340
Record:  341
Record:  342
Record:  343
Record:  344
Record:  345
Record:  346
Record:  347
Record:  348
Record:  349
Record:  350
Record:  351
Record:  352
Record:  353
Record:  354
Record:  355
Record:  356
Record:  357
Record:  358
Record:  359
Record:  360
Record:  361
Record:  362
Record:  363
Record:  364
Record:  365
Record:  366
Record:  367
Record:  368
Record:  369
Record:  370
Record:  371
Record:  372
Record:  373
Record:  374
Record:  375
Record:  376
Record:  377
Record:  378
Record:  379
Record:  380
Record:  381
Record:  382
Record:  383
Record:  384
Record:  385
Record:  386
Record:  387
Record:  388
Record:  389
Record:  390
Record:  391
Record:  392
Record:  393
Record:  394
Record:  395
Record:  396
Record:  397
Record:  398
Record:  399
Record:  400
Record:  401
Record:  402
Record:  403
Record:  404
Record:  405
Record:  406
Record:  407
Record:  408
Record:  409
Record:  410
Record:  411
Record:  412
Record:  413
Record:  414
Record:  415
Record:  416
Record:  417
Record:  418
Record:  419
Record:  420
Record:  421
Record:  422
Record:  423
Record:  424
Record:  425
Record:  426
Record:  427
Record:  428
Record:  429
Record:  430
Record:  431
Record:  432
Record:  433
Record:  434
Record:  435
Record:  436
Record:  437
Record:  438
Record:  439
Record:  440
Record:  441
Record:  442
Record:  443
Record:  444
Record:  445
Record:  446
Record:  447
Record:  448
Record:  449
Record:  450
Record:  451
Record:  452
Record:  453
Record:  454
Record:  455
Record:  456
Record:  457
Record:  458
Record:  459
Record:  460
Record:  461
Record:  462
Record:  463
Record:  464
Record:  465
Record:  466
Record:  467
Record:  468
Record:  469
Record:  470
Record:  471
Record:  472
Record:  473
Record:  474
Record:  475
Record:  476
Record:  477
Record:  478
Record:  479
Record:  480
Record:  481
Record:  482
Record:  483
Record:  484
Record:  485
Record:  486
Record:  487
Record:  488
Record:  489
Record:  490
Record:  491
Record:  492
Record:  493
Record:  494
Record:  495
Record:  496
Record:  497
Record:  498
Record:  499
Record:  500
Record:  501
Record:  502
Record:  503
Record:  504
Record:  505
Record:  506
Record:  507
Record:  508
Record:  509
Record:  510
Record:  511
Record:  512
Record:  513
Record:  514
Record:  515
Record:  516
Record:  517
Record:  518
Record:  519
Record:  520
Record:  521
Record:  522
Record:  523
Record:  524
Record:  525
Record:  526
Record:  527
Record:  528
Record:  529
Record:  530
Record:  531
Record:  532
Record:  533
Record:  534
Record:  535
Record:  536
Record:  537
Record:  538
Record:  539
Record:  540
Record:  541
Record:  542
Record:  543
Record:  544
Record:  545
Record:  546
Record:  547
Record:  548
Record:  549
Record:  550
Record:  551
Record:  552
Record:  553
Record:  554
Record:  555
Record:  556
Record:  557
Record:  558
Record:  559
Record:  560
Record:  561
Record:  562
Record:  563
Record:  564
Record:  565
Record:  566
Record:  567
Record:  568
Record:  569
Record:  570
Record:  571
Record:  572
Record:  573
Record:  574
Record:  575
Record:  576
Record:  577
Record:  578
Record:  579
Record:  580
Record:  581
Record:  582
Record:  583
Record:  584
Record:  585
Record:  586
Record:  587
Record:  588
Record:  589
Record:  590
Record:  591
Record:  592
Record:  593
Record:  594
Record:  595
Record:  596
Record:  597
Record:  598
Record:  599
Record:  600
Record:  601
Record:  602
Record:  603
Record:  604
Record:  605
Record:  606
Record:  607
Record:  608
Record:  609
Record:  610
Record:  611
Record:  612
Record:  613
Record:  614
Record:  615
Record:  616
Record:  617
Record:  618
Record:  619
Record:  620
Record:  621
Record:  622
Record:  623
Record:  624
Record:  625
Record:  626
Record:  627
Record:  628
Record:  629
Record:  630
Record:  631
Record:  632
Record:  633
Record:  634
Record:  635
Record:  636
Record:  637
Record:  638
Record:  639
Record:  640
Record:  641
Record:  642
Record:  643
Record:  644
Record:  645
Record:  646
Record:  647
Record:  648
Record:  649
Record:  650
Record:  651
Record:  652
Record:  653
Record:  654
Record:  655
Record:  656
Record:  657
Record:  658
Record:  659
Record:  660
Record:  661
Record:  662
Record:  663
Record:  664
Record:  665
Record:  666
Record:  667
Record:  668
Record:  669
Record:  670
Record:  671
Record:  672
Record:  673
Record:  674
Record:  675
Record:  676
Record:  677
Record:  678
Record:  679
Record:  680
Record:  681
Record:  682
Record:  683
Record:  684
Record:  685
Record:  686
Record:  687
Record:  688
Record:  689
Record:  690
Record:  691
Record:  692
Record:  693
Record:  694
Record:  695
Record:  696
Record:  697
Record:  698
Record:  699
Record:  700
Record:  701
Record:  702
Record:  703
Record:  704
Record:  705
Record:  706
Record:  707
Record:  708
Record:  709
Record:  710
Record:  711
Record:  712
Record:  713
Record:  714
Record:  715
Record:  716
Record:  717
Record:  718
Record:  719
Record:  720
Record:  721
Record:  722
Record:  723
Record:  724
Record:  725
Record:  726
Record:  727
Record:  728
Record:  729
Record:  730
Record:  731
Record:  732
Record:  733
Record:  734
Record:  735
Record:  736
Record:  737
Record:  738
Record:  739
Record:  740
Record:  741
Record:  742
Record:  743
Record:  744
Record:  745
Record:  746
Record:  747
Record:  748
Record:  749
Record:  750
Record:  751
Record:  752
Record:  753
Record:  754
Record:  755
Record:  756
Record:  757
Record:  758
Record:  759
Record:  760
Record:  761
Record:  762
Record:  763
Record:  764
Record:  765
Record:  766
Record:  767
Record:  768
Record:  769
Record:  770
Record:  771
Record:  772
Record:  773
Record:  774
Record:  775
Record:  776
Record:  777
Record:  778
Record:  779
Record:  780
Record:  781
Record:  782
Record:  783
Record:  784
Record:  785
Record:  786
Record:  787
Record:  788
Record:  789
Record:  790
Record:  791
Record:  792
Record:  793
Record:  794
Record:  795
Record:  796
Record:  797
Record:  798
Record:  799
Record:  800
Record:  801
Record:  802
Record:  803
Record:  804
Record:  805
Record:  806
Record:  807
Record:  808
Record:  809
Record:  810
Record:  811
Record:  812
Record:  813
Record:  814
Record:  815
Record:  816
Record:  817
Record:  818
Record:  819
Record:  820
Record:  821
Record:  822
Record:  823
Record:  824
Record:  825
Record:  826
Record:  827
Record:  828
Record:  829
Record:  830
Record:  831
Record:  832
Record:  833
Record:  834
Record:  835
Record:  836
Record:  837
Record:  838
Record:  839
Record:  840
Record:  841
Record:  842
Record:  843
Record:  844
Record:  845
Record:  846
Record:  847
Record:  848
Record:  849
Record:  850
Record:  851
Record:  852
Record:  853
Record:  854
Record:  855
Record:  856
Record:  857
Record:  858
Record:  859
Record:  860
Record:  861
Record:  862
Record:  863
Record:  864
Record:  865
Record:  866
Record:  867
Record:  868
Record:  869
Record:  870
Record:  871
Record:  872
Record:  873
Record:  874
Record:  875
Record:  876
Record:  877
Record:  878
Record:  879
Record:  880
Record:  881
Record:  882
Record:  883
Record:  884
Record:  885
Record:  886
Record:  887
Record:  888
Record:  889
Record:  890
Record:  891
Record:  892
Record:  893
Record:  894
Record:  895
Record:  896
Record:  897
Record:  898
Record:  899
Record:  900
Record:  901
Record:  902
Record:  903
Record:  904
Record:  905
Record:  906
Record:  907
Record:  908
Record:  909
Record:  910
Record:  911
Record:  912
Record:  913
Record:  914
Record:  915
Record:  916
Record:  917
Record:  918
Record:  919
Record:  920
Record:  921
Record:  922
Record:  923
Record:  924
Record:  925
Record:  926
Record:  927
Record:  928
Record:  929
Record:  930
Record:  931
Record:  932
Record:  933
Record:  934
Record:  935
Record:  936
Record:  937
Record:  938
Record:  939
Record:  940
Record:  941
Record:  942
Record:  943
Record:  944
Record:  945
Record:  946
Record:  947
Record:  948
Record:  949
Record:  950
Record:  951
Record:  952
Record:  953
Record:  954
Record:  955
Record:  956
Record:  957
Record:  958
Record:  959
Record:  960
Record:  961
Record:  962
Record:  963
Record:  964
Record:  965
Record:  966
Record:  967
Record:  968
Record:  969
Record:  970
Record:  971
Record:  972
Record:  973
Record:  974
Record:  975
Record:  976
Record:  977
Record:  978
Record:  979
Record:  980
Record:  981
Record:  982
Record:  983
Record:  984
Record:  985
Record:  986
Record:  987
Record:  988
Record:  989
Record:  990
Record:  991
Record:  992
Record:  993
Record:  994
Record:  995
Record:  996
Record:  997
Record:  998
Record:  999
Record:  1000
Record:  1001
Record:  1002
Record:  1003
Record:  1004
Record:  1005
Record:  1006
Record:  1007
Record:  1008
Record:  1009
Record:  1010
Record:  1011
Record:  1012
Record:  1013
Record:  1014
Record:  1015
Record:  1016
Record:  1017
Record:  1018
Record:  1019
Record:  1020
Record:  1021
Record:  1022
Record:  1023
Record:  1024
Record:  1025
Record:  1026
Record:  1027
Record:  1028
Record:  1029
Record:  1030
Record:  1031
Record:  1032
Record:  1033
Record:  1034
Record:  1035
Record:  1036
Record:  1037
Record:  1038
Record:  1039
Record:  1040
Record:  1041
Record:  1042
Record:  1043
Record:  1044
Record:  1045
Record:  1046
Record:  1047
Record:  1048
Record:  1049
Record:  1050
Record:  1051
Record:  1052
Record:  1053
Record:  1054
Record:  1055
Record:  1056
Record:  1057
Record:  1058
Record:  1059
Record:  1060
Record:  1061
Record:  1062
Record:  1063
Record:  1064
Record:  1065
Record:  1066
Record:  1067
Record:  1068
Record:  1069
Record:  1070
Record:  1071
Record:  1072
Record:  1073
Record:  1074
Record:  1075
Record:  1076
Record:  1077
Record:  1078
Record:  1079
Record:  1080
Record:  1081
Record:  1082
Record:  1083
Record:  1084
Record:  1085
Record:  1086
Record:  1087
Record:  1088
Record:  1089
Record:  1090
Record:  1091
Record:  1092
Record:  1093
Record:  1094
Record:  1095
Record:  1096
Record:  1097
Record:  1098
Record:  1099
Record:  1100
Record:  1101
Record:  1102
Record:  1103
Record:  1104
Record:  1105
Record:  1106
Record:  1107
Record:  1108
Record:  1109
Record:  1110
Record:  1111
Record:  1112
Record:  1113
Record:  1114
Record:  1115
Record:  1116
Record:  1117
Record:  1118
Record:  1119
Record:  1120
Record:  1121
Record:  1122
Record:  1123
Record:  1124
Record:  1125
Record:  1126
Record:  1127
Record:  1128
Record:  1129
Record:  1130
Record:  1131
Record:  1132
Record:  1133
Record:  1134
Record:  1135
Record:  1136
Record:  1137
Record:  1138
Record:  1139
Record:  1140
Record:  1141
Record:  1142
Record:  1143
Record:  1144
Record:  1145
Record:  1146
Record:  1147
Record:  1148
Record:  1149
Record:  1150
Record:  1151
Record:  1152
Record:  1153
Record:  1154
Record:  1155
Record:  1156
Record:  1157
Record:  1158
Record:  1159
Record:  1160
Record:  1161
Record:  1162
Record:  1163
Record:  1164
Record:  1165
Record:  1166
Record:  1167
Record:  1168
Record:  1169
Record:  1170
Record:  1171
Record:  1172
Record:  1173
Record:  1174
Record:  1175
Record:  1176
Record:  1177
Record:  1178
Record:  1179
Record:  1180
Record:  1181
Record:  1182
Record:  1183
Record:  1184
Record:  1185
Record:  1186
Record:  1187
Record:  1188
Record:  1189
Record:  1190
Record:  1191
Record:  1192
Record:  1193
Record:  1194
Record:  1195
Record:  1196
Record:  1197
Record:  1198
Record:  1199
Record:  1200
Record:  1201
Record:  1202
Record:  1203
Record:  1204
Record:  1205
Record:  1206
Record:  1207
Record:  1208
Record:  1209
Record:  1210
Record:  1211
Record:  1212
Record:  1213
Record:  1214
Record:  1215
Record:  1216
Record:  1217
Record:  1218
Record:  1219
Record:  1220
Record:  1221
Record:  1222
Record:  1223
Record:  1224
Record:  1225
Record:  1226
Record:  1227
Record:  1228
Record:  1229
Record:  1230
Record:  1231
Record:  1232
Record:  1233
Record:  1234
Record:  1235
Record:  1236
Record:  1237
Record:  1238
Record:  1239
Record:  1240
Record:  1241
Record:  1242
Record:  1243
Record:  1244
Record:  1245
Record:  1246
Record:  1247
Record:  1248
Record:  1249
Record:  1250
Record:  1251
Record:  1252
Record:  1253
Record:  1254
Record:  1255
Record:  1256
Record:  1257
Record:  1258
Record:  1259
Record:  1260
Record:  1261
Record:  1262
Record:  1263
Record:  1264
Record:  1265
Record:  1266
Record:  1267
Record:  1268
Record:  1269
Record:  1270
Record:  1271
Record:  1272
Record:  1273
Record:  1274
Record:  1275
Record:  1276
Record:  1277
Record:  1278
Record:  1279
Record:  1280
Record:  1281
Record:  1282
Record:  1283
Record:  1284
Record:  1285
Record:  1286
Record:  1287
Record:  1288
Record:  1289
Record:  1290
Record:  1291
Record:  1292
Record:  1293
Record:  1294
Record:  1295
Record:  1296
Record:  1297
Record:  1298
Record:  1299
Record:  1300
Record:  1301
Record:  1302
Record:  1303
Record:  1304
Record:  1305
Record:  1306
Record:  1307
Record:  1308
Record:  1309
Record:  1310
Record:  1311
Record:  1312
Record:  1313
Record:  1314
Record:  1315
Record:  1316
Record:  1317
Record:  1318
Record:  1319
Record:  1320
Record:  1321
Record:  1322
Record:  1323
Record:  1324
Record:  1325
Record:  1326
Record:  1327
Record:  1328
Record:  1329
Record:  1330
Record:  1331
Record:  1332
Record:  1333
Record:  1334
Record:  1335
Record:  1336
Record:  1337
Record:  1338
Record:  1339
Record:  1340
Record:  1341
Record:  1342
Record:  1343
Record:  1344
Record:  1345
Record:  1346
Record:  1347
Record:  1348
Record:  1349
Record:  1350
Record:  1351
Record:  1352
Record:  1353
Record:  1354
Record:  1355
Record:  1356
Record:  1357
Record:  1358
Record:  1359
Record:  1360
Record:  1361
Record:  1362
Record:  1363
Record:  1364
Record:  1365
Record:  1366
Record:  1367
Record:  1368
Record:  1369
Record:  1370
Record:  1371
Record:  1372
Record:  1373
Record:  1374
Record:  1375
Record:  1376
Record:  1377
Record:  1378
Record:  1379
Record:  1380
Record:  1381
Record:  1382
Record:  1383
Record:  1384
Record:  1385
Record:  1386
Record:  1387
Record:  1388
Record:  1389
Record:  1390
Record:  1391
Record:  1392
Record:  1393
Record:  1394
Record:  1395
Record:  1396
Record:  1397
Record:  1398
Record:  1399
Record:  1400
Record:  1401
Record:  1402
Record:  1403
Record:  1404
Record:  1405
Record:  1406
Record:  1407
Record:  1408
Record:  1409
Record:  1410
Record:  1411
Record:  1412
Record:  1413
Record:  1414
Record:  1415
Record:  1416
Record:  1417
Record:  1418
Record:  1419
Record:  1420
Record:  1421
Record:  1422
Record:  1423
Record:  1424
Record:  1425
Record:  1426
Record:  1427
Record:  1428
Record:  1429
Record:  1430
Record:  1431
Record:  1432
Record:  1433
Record:  1434
Record:  1435
Record:  1436
Record:  1437
Record:  1438
Record:  1439
Record:  1440
Record:  1441
Record:  1442
Record:  1443
Record:  1444
Record:  1445
Record:  1446
Record:  1447
Record:  1448
Record:  1449
Record:  1450
Record:  1451
Record:  1452
Record:  1453
Record:  1454
Record:  1455
Record:  1456
Record:  1457
Record:  1458
Record:  1459
Record:  1460
Record:  1461
Record:  1462
Record:  1463
Record:  1464
Record:  1465
Record:  1466
Record:  1467
Record:  1468
Record:  1469
Record:  1470
Record:  1471
Record:  1472
Record:  1473
Record:  1474
Record:  1475
Record:  1476
Record:  1477
Record:  1478
Record:  1479
Record:  1480
Record:  1481
Record:  1482
Record:  1483
Record:  1484
Record:  1485
Record:  1486
Record:  1487
Record:  1488
Record:  1489
Record:  1490
Record:  1491
Record:  1492
Record:  1493
Record:  1494
Record:  1495
Record:  1496
Record:  1497
Record:  1498
Record:  1499
Record:  1500
Record:  1501
Record:  1502
Record:  1503
Record:  1504
Record:  1505
Record:  1506
Record:  1507
Record:  1508
Record:  1509
Record:  1510
Record:  1511
Record:  1512
Record:  1513
Record:  1514
Record:  1515
Record:  1516
Record:  1517
Record:  1518
Record:  1519
Record:  1520
Record:  1521
Record:  1522
Record:  1523
Record:  1524
Record:  1525
Record:  1526
Record:  1527
Record:  1528
Record:  1529
Record:  1530
Record:  1531
Record:  1532
Record:  1533
Record:  1534
Record:  1535
Record:  1536
Record:  1537
Record:  1538
Record:  1539
Record:  1540
Record:  1541
Record:  1542
Record:  1543
Record:  1544
Record:  1545
Record:  1546
Record:  1547
Record:  1548
Record:  1549
Record:  1550
Record:  1551
Record:  1552
Record:  1553
Record:  1554
Record:  1555
Record:  1556
Record:  1557
Record:  1558
Record:  1559
Record:  1560
Record:  1561
Record:  1562
Record:  1563
Record:  1564
Record:  1565
Record:  1566
Record:  1567
Record:  1568
Record:  1569
Record:  1570
Record:  1571
Record:  1572
Record:  1573
Record:  1574
Record:  1575
Record:  1576
Record:  1577
Record:  1578
Record:  1579
Record:  1580
Record:  1581
Record:  1582
Record:  1583
Record:  1584
Record:  1585
Record:  1586
Record:  1587
Record:  1588
Record:  1589
Record:  1590
Record:  1591
Record:  1592
Record:  1593
Record:  1594
Record:  1595
Record:  1596
Record:  1597
Record:  1598
Record:  1599
Record:  1600
Record:  1601
Record:  1602
Record:  1603
Record:  1604
Record:  1605
Record:  1606
Record:  1607
Record:  1608
Record:  1609
Record:  1610
Record:  1611
Record:  1612
Record:  1613
Record:  1614
Record:  1615
Record:  1616
Record:  1617
Record:  1618
Record:  1619
Record:  1620
Record:  1621
Record:  1622
Record:  1623
Record:  1624
Record:  1625
Record:  1626
Record:  1627
Record:  1628
Record:  1629
Record:  1630
Record:  1631
Record:  1632
Record:  1633
Record:  1634
Record:  1635
Record:  1636
Record:  1637
Record:  1638
Record:  1639
Record:  1640
Record:  1641
Record:  1642
Record:  1643
Record:  1644
Record:  1645
Record:  1646
Record:  1647
Record:  1648
Record:  1649
Record:  1650
Record:  1651
Record:  1652
Record:  1653
Record:  1654
Record:  1655
Record:  1656
Record:  1657
Record:  1658
Record:  1659
Record:  1660
Record:  1661
Record:  1662
Record:  1663
Record:  1664
Record:  1665
Record:  1666
Record:  1667
Record:  1668
Record:  1669
Record:  1670
Record:  1671
Record:  1672
Record:  1673
Record:  1674
Record:  1675
Record:  1676
Record:  1677
Record:  1678
Record:  1679
Record:  1680
Record:  1681
Record:  1682
Record:  1683
Record:  1684
Record:  1685
Record:  1686
Record:  1687
Record:  1688
Record:  1689
Record:  1690
Record:  1691
Record:  1692
Record:  1693
Record:  1694
Record:  1695
Record:  1696
Record:  1697
Record:  1698
Record:  1699
Record:  1700
Record:  1701
Record:  1702
Record:  1703
Record:  1704
Record:  1705
Record:  1706
Record:  1707
Record:  1708
Record:  1709
Record:  1710
Record:  1711
Record:  1712
Record:  1713
Record:  1714
Record:  1715
Record:  1716
Record:  1717
Record:  1718
Record:  1719
Record:  1720
Record:  1721
Record:  1722
Record:  1723
Record:  1724
Record:  1725
Record:  1726
Record:  1727
Record:  1728
Record:  1729
Record:  1730
Record:  1731
Record:  1732
Record:  1733
Record:  1734
Record:  1735
Record:  1736
Record:  1737
Record:  1738
Record:  1739
Record:  1740
Record:  1741
Record:  1742
Record:  1743
Record:  1744
Record:  1745
Record:  1746
Record:  1747
Record:  1748
Record:  1749
Record:  1750
Record:  1751
Record:  1752
Record:  1753
Record:  1754
Record:  1755
Record:  1756
Record:  1757
Record:  1758
Record:  1759
Record:  1760
Record:  1761
Record:  1762
Record:  1763
Record:  1764
Record:  1765
Record:  1766
Record:  1767
Record:  1768
Record:  1769
Record:  1770
Record:  1771
Record:  1772
Record:  1773
Record:  1774
Record:  1775
Record:  1776
Record:  1777
Record:  1778
Record:  1779
Record:  1780
Record:  1781
Record:  1782
Record:  1783
Record:  1784
Record:  1785
Record:  1786
Record:  1787
Record:  1788
Record:  1789
Record:  1790
Record:  1791
Record:  1792
Record:  1793
Record:  1794
Record:  1795
Record:  1796
Record:  1797
Record:  1798
Record:  1799
Record:  1800
Record:  1801
Record:  1802
Record:  1803
Record:  1804
Record:  1805
Record:  1806
Record:  1807
Record:  1808
Record:  1809
Record:  1810
Record:  1811
Record:  1812
Record:  1813
Record:  1814
Record:  1815
Record:  1816
Record:  1817
Record:  1818
Record:  1819
Record:  1820
Record:  1821
Record:  1822
Record:  1823
Record:  1824
Record:  1825
Record:  1826
Record:  1827
Record:  1828
Record:  1829
Record:  1830
Record:  1831
Record:  1832
Record:  1833
Record:  1834
Record:  1835
Record:  1836
Record:  1837
Record:  1838
Record:  1839
Record:  1840
Record:  1841
Record:  1842
Record:  1843
Record:  1844
Record:  1845
Record:  1846
Record:  1847
Record:  1848
Record:  1849
Record:  1850
Record:  1851
Record:  1852
Record:  1853
Record:  1854
Record:  1855
Record:  1856
Record:  1857
Record:  1858
Record:  1859
Record:  1860
Record:  1861
Record:  1862
Record:  1863
Record:  1864
Record:  1865
Record:  1866
Record:  1867
Record:  1868
Record:  1869
Record:  1870
Record:  1871
Record:  1872
Record:  1873
Record:  1874
Record:  1875
Record:  1876
Record:  1877
Record:  1878
Record:  1879
Record:  1880
Record:  1881
Record:  1882
Record:  1883
Record:  1884
Record:  1885
Record:  1886
Record:  1887
Record:  1888
Record:  1889
Record:  1890
Record:  1891
Record:  1892
Record:  1893
Record:  1894
Record:  1895
Record:  1896
Record:  1897
Record:  1898
Record:  1899
Record:  1900
Record:  1901
Record:  1902
Record:  1903
Record:  1904
Record:  1905
Record:  1906
Record:  1907
Record:  1908
Record:  1909
Record:  1910
Record:  1911
Record:  1912
Record:  1913
Record:  1914
Record:  1915
Record:  1916
Record:  1917
Record:  1918
Record:  1919
Record:  1920
Record:  1921
Record:  1922
Record:  1923
Record:  1924
Record:  1925
Record:  1926
Record:  1927
Record:  1928
Record:  1929
Record:  1930
Record:  1931
Record:  1932
Record:  1933
Record:  1934
Record:  1935
Record:  1936
Record:  1937
Record:  1938
Record:  1939
Record:  1940
Record:  1941
Record:  1942
Record:  1943
Record:  1944
Record:  1945
Record:  1946
Record:  1947
Record:  1948
Record:  1949
Record:  1950
Record:  1951
Record:  1952
Record:  1953
Record:  1954
Record:  1955
Record:  1956
Record:  1957
Record:  1958
Record:  1959
Record:  1960
Record:  1961
Record:  1962
Record:  1963
Record:  1964
Record:  1965
Record:  1966
Record:  1967
Record:  1968
Record:  1969
Record:  1970
Record:  1971
Record:  1972
Record:  1973
Record:  1974
Record:  1975
Record:  1976
Record:  1977
Record:  1978
Record:  1979
Record:  1980
Record:  1981
Record:  1982
Record:  1983
Record:  1984
Record:  1985
Record:  1986
Record:  1987
Record:  1988
Record:  1989
Record:  1990
Record:  1991
Record:  1992
Record:  1993
Record:  1994
Record:  1995
Record:  1996
Record:  1997
Record:  1998
Record:  1999
Record:  2000
Record:  2001
Record:  2002
Record:  2003
Record:  2004
Record:  2005
Record:  2006
Record:  2007
Record:  2008
Record:  2009
Record:  2010
Record:  2011
Record:  2012
Record:  2013
Record:  2014
Record:  2015
Record:  2016
Record:  2017
Record:  2018
Record:  2019
Record:  2020
Record:  2021
Record:  2022
Record:  2023
Record:  2024
Record:  2025
Record:  2026
Record:  2027
Record:  2028
Record:  2029
Record:  2030
Record:  2031
Record:  2032
Record:  2033
Record:  2034
Record:  2035
Record:  2036
Record:  2037
Record:  2038
Record:  2039
Record:  2040
Record:  2041
Record:  2042
Record:  2043
Record:  2044
Record:  2045
Record:  2046
Record:  2047
Record:  2048
Record:  2049
Record:  2050
Record:  2051
Record:  2052
Record:  2053
Record:  2054
Record:  2055
Record:  2056
Record:  2057
Record:  2058
Record:  2059
Record:  2060
Record:  2061
Record:  2062
Record:  2063
Record:  2064
Record:  2065
Record:  2066
Record:  2067
Record:  2068
Record:  2069
Record:  2070
Record:  2071
Record:  2072
Record:  2073
Record:  2074
Record:  2075
Record:  2076
Record:  2077
Record:  2078
Record:  2079
Record:  2080
Record:  2081
Record:  2082
Record:  2083
Record:  2084
Record:  2085
Record:  2086
Record:  2087
Record:  2088
Record:  2089
Record:  2090
Record:  2091
Record:  2092
Record:  2093
Record:  2094
Record:  2095
Record:  2096
Record:  2097
Record:  2098
Record:  2099
Record:  2100
Record:  2101
Record:  2102
Record:  2103
Record:  2104
Record:  2105
Record:  2106
Record:  2107
Record:  2108
Record:  2109
Record:  2110
Record:  2111
Record:  2112
Record:  2113
Record:  2114
Record:  2115
Record:  2116
Record:  2117
Record:  2118
Record:  2119
Record:  2120
Record:  2121
Record:  2122
Record:  2123
Record:  2124
Record:  2125
Record:  2126
Record:  2127
Record:  2128
Record:  2129
Record:  2130
Record:  2131
Record:  2132
Record:  2133
Record:  2134
Record:  2135
Record:  2136
Record:  2137
Record:  2138
Record:  2139
Record:  2140
Record:  2141
Record:  2142
Record:  2143
Record:  2144
Record:  2145
Record:  2146
Record:  2147
Record:  2148
Record:  2149
Record:  2150
Record:  2151
Record:  2152
Record:  2153
Record:  2154
Record:  2155
Record:  2156
Record:  2157
Record:  2158
Record:  2159
Record:  2160
Record:  2161
Record:  2162
Record:  2163
Record:  2164
Record:  2165
Record:  2166
Record:  2167
Record:  2168
Record:  2169
Record:  2170
Record:  2171
Record:  2172
Record:  2173
Record:  2174
Record:  2175
Record:  2176
Record:  2177
Record:  2178
Record:  2179
Record:  2180
Record:  2181
Record:  2182
Record:  2183
Record:  2184
Record:  2185
Record:  2186
Record:  2187
Record:  2188
Record:  2189
Record:  2190
Record:  2191
Record:  2192
Record:  2193
Record:  2194
Record:  2195
Record:  2196
Record:  2197
Record:  2198
Record:  2199
Record:  2200
Record:  2201
Record:  2202
Record:  2203
Record:  2204
Record:  2205
Record:  2206
Record:  2207
Record:  2208
Record:  2209
Record:  2210
Record:  2211
Record:  2212
Record:  2213
Record:  2214
Record:  2215
Record:  2216
Record:  2217
Record:  2218
Record:  2219
Record:  2220
Record:  2221
Record:  2222
Record:  2223
Record:  2224
Record:  2225
Record:  2226
Record:  2227
Record:  2228
Record:  2229
Record:  2230
Record:  2231
Record:  2232
Record:  2233
Record:  2234
Record:  2235
Record:  2236
Record:  2237
Record:  2238
Record:  2239
Record:  2240
Record:  2241
Record:  2242
Record:  2243
Record:  2244
Record:  2245
Record:  2246
Record:  2247
Record:  2248
Record:  2249
Record:  2250
Record:  2251
Record:  2252
Record:  2253
Record:  2254
Record:  2255
Record:  2256
Record:  2257
Record:  2258
Record:  2259
Record:  2260
Record:  2261
Record:  2262
Record:  2263
Record:  2264
Record:  2265
Record:  2266
Record:  2267
Record:  2268
Record:  2269
Record:  2270
Record:  2271
Record:  2272
Record:  2273
Record:  2274
Record:  2275
Record:  2276
Record:  2277
Record:  2278
Record:  2279
Record:  2280
Record:  2281
Record:  2282
Record:  2283
Record:  2284
Record:  2285
Record:  2286
Record:  2287
Record:  2288
Record:  2289
Record:  2290
Record:  2291
Record:  2292
Record:  2293
Record:  2294
Record:  2295
Record:  2296
Record:  2297
Record:  2298
Record:  2299
Record:  2300
Record:  2301
Record:  2302
Record:  2303
Record:  2304
Record:  2305
Record:  2306
Record:  2307
Record:  2308
Record:  2309
Record:  2310
Record:  2311
Record:  2312
Record:  2313
Record:  2314
Record:  2315
Record:  2316
Record:  2317
Record:  2318
Record:  2319
Record:  2320
Record:  2321
Record:  2322
Record:  2323
Record:  2324
Record:  2325
Record:  2326
Record:  2327
Record:  2328
Record:  2329
Record:  2330
Record:  2331
Record:  2332
Record:  2333
Record:  2334
Record:  2335
Record:  2336
Record:  2337
Record:  2338
Record:  2339
Record:  2340
Record:  2341
Record:  2342
Record:  2343
Record:  2344
Record:  2345
Record:  2346
Record:  2347
Record:  2348
Record:  2349
Record:  2350
Record:  2351
Record:  2352
Record:  2353
Record:  2354
Record:  2355
Record:  2356
Record:  2357
Record:  2358
Record:  2359
Record:  2360
Record:  2361
Record:  2362
Record:  2363
Record:  2364
Record:  2365
Record:  2366
Record:  2367
Record:  2368
Record:  2369
Record:  2370
Record:  2371
Record:  2372
Record:  2373
Record:  2374
Record:  2375
Record:  2376
Record:  2377
Record:  2378
Record:  2379
Record:  2380
Record:  2381
Record:  2382
Record:  2383
Record:  2384
Record:  2385
Record:  2386
Record:  2387
Record:  2388
Record:  2389
Record:  2390
Record:  2391
Record:  2392
Record:  2393
Record:  2394
Record:  2395
Record:  2396
Record:  2397
Record:  2398
Record:  2399
Record:  2400
Record:  2401
Record:  2402
Record:  2403
Record:  2404
Record:  2405
Record:  2406
Record:  2407
Record:  2408
Record:  2409
Record:  2410
Record:  2411
Record:  2412
Record:  2413
Record:  2414
Record:  2415
Record:  2416
Record:  2417
Record:  2418
Record:  2419
Record:  2420
Record:  2421
Record:  2422
Record:  2423
Record:  2424
Record:  2425
Record:  2426
Record:  2427
Record:  2428
Record:  2429
Record:  2430
Record:  2431
Record:  2432
Record:  2433
Record:  2434
Record:  2435
Record:  2436
Record:  2437
Record:  2438
Record:  2439
Record:  2440
Record:  2441
Record:  2442
Record:  2443
Record:  2444
Record:  2445
Record:  2446
Record:  2447
Record:  2448
Record:  2449
Record:  2450
Record:  2451
Record:  2452
Record:  2453
Record:  2454
Record:  2455
Record:  2456
Record:  2457
Record:  2458
Record:  2459
Record:  2460
Record:  2461
Record:  2462
Record:  2463
Record:  2464
Record:  2465
Record:  2466
Record:  2467
Record:  2468
Record:  2469
Record:  2470
Record:  2471
Record:  2472
Record:  2473
Record:  2474
Record:  2475
Record:  2476
Record:  2477
Record:  2478
Record:  2479
Record:  2480
Record:  2481
Record:  2482
Record:  2483
Record:  2484
Record:  2485
Record:  2486
Record:  2487
Record:  2488
Record:  2489
Record:  2490
Record:  2491
Record:  2492
Record:  2493
Record:  2494
Record:  2495
Record:  2496
Record:  2497
Record:  2498
Record:  2499
Record:  2500
Record:  2501
Record:  2502
Record:  2503
Record:  2504
Record:  2505
Record:  2506
Record:  2507
Record:  2508
Record:  2509
Record:  2510
Record:  2511
Record:  2512
Record:  2513
Record:  2514
Record:  2515
Record:  2516
Record:  2517
Record:  2518
Record:  2519
Record:  2520
Record:  2521
Record:  2522
Record:  2523
Record:  2524
Record:  2525
Record:  2526
Record:  2527
Record:  2528
Record:  2529
Record:  2530
Record:  2531
Record:  2532
Record:  2533
Record:  2534
Record:  2535
Record:  2536
Record:  2537
Record:  2538
Record:  2539
Record:  2540
Record:  2541
Record:  2542
Record:  2543
Record:  2544
Record:  2545
Record:  2546
Record:  2547
Record:  2548
Record:  2549
Record:  2550
Record:  2551
Record:  2552
Record:  2553
Record:  2554
Record:  2555
Record:  2556
Record:  2557
Record:  2558
Record:  2559
Record:  2560
Record:  2561
Record:  2562
Record:  2563
Record:  2564
Record:  2565
Record:  2566
Record:  2567
Record:  2568
Record:  2569
Record:  2570
Record:  2571
Record:  2572
Record:  2573
Record:  2574
Record:  2575
Record:  2576
Record:  2577
Record:  2578
Record:  2579
Record:  2580
Record:  2581
Record:  2582
Record:  2583
Record:  2584
Record:  2585
Record:  2586
Record:  2587
Record:  2588
Record:  2589
Record:  2590
Record:  2591
Record:  2592
Record:  2593
Record:  2594
Record:  2595
Record:  2596
Record:  2597
Record:  2598
Record:  2599
Record:  2600
Record:  2601
Record:  2602
Record:  2603
Record:  2604
Record:  2605
Record:  2606
Record:  2607
Record:  2608
Record:  2609
Record:  2610
Record:  2611
Record:  2612
Record:  2613
Record:  2614
Record:  2615
Record:  2616
Record:  2617
Record:  2618
Record:  2619
Record:  2620
Record:  2621
Record:  2622
Record:  2623
Record:  2624
Record:  2625
Record:  2626
Record:  2627
Record:  2628
Record:  2629
Record:  2630
Record:  2631
Record:  2632
Record:  2633
Record:  2634
Record:  2635
Record:  2636
Record:  2637
Record:  2638
Record:  2639
Record:  2640
Record:  2641
Record:  2642
Record:  2643
Record:  2644
Record:  2645
Record:  2646
Record:  2647
Record:  2648
Record:  2649
Record:  2650
Record:  2651
Record:  2652
Record:  2653
Record:  2654
Record:  2655
Record:  2656
Record:  2657
Record:  2658
Record:  2659
Record:  2660
Record:  2661
Record:  2662
Record:  2663
Record:  2664
Record:  2665
Record:  2666
Record:  2667
Record:  2668
Record:  2669
Record:  2670
Record:  2671
Record:  2672
Record:  2673
Record:  2674
Record:  2675
Record:  2676
Record:  2677
Record:  2678
Record:  2679
Record:  2680
Record:  2681
Record:  2682
Record:  2683
Record:  2684
Record:  2685
Record:  2686
Record:  2687
Record:  2688
Record:  2689
Record:  2690
Record:  2691
Record:  2692
Record:  2693
Record:  2694
Record:  2695
Record:  2696
Record:  2697
Record:  2698
Record:  2699
Record:  2700
Record:  2701
Record:  2702
Record:  2703
Record:  2704
Record:  2705
Record:  2706
Record:  2707
Record:  2708
Record:  2709
Record:  2710
Record:  2711
Record:  2712
Record:  2713
Record:  2714
Record:  2715
Record:  2716
Record:  2717
Record:  2718
Record:  2719
Record:  2720
Record:  2721
Record:  2722
Record:  2723
Record:  2724
Record:  2725
Record:  2726
Record:  2727
Record:  2728
Record:  2729
Record:  2730
Record:  2731
Record:  2732
Record:  2733
Record:  2734
Record:  2735
Record:  2736
Record:  2737
Record:  2738
Record:  2739
Record:  2740
Record:  2741
Record:  2742
Record:  2743
Record:  2744
Record:  2745
Record:  2746
Record:  2747
Record:  2748
Record:  2749
Record:  2750
Record:  2751
Record:  2752
Record:  2753
Record:  2754
Record:  2755
Record:  2756
Record:  2757
Record:  2758
Record:  2759
Record:  2760
Record:  2761
Record:  2762
Record:  2763
Record:  2764
Record:  2765
Record:  2766
Record:  2767
Record:  2768
Record:  2769
Record:  2770
Record:  2771
Record:  2772
Record:  2773
Record:  2774
Record:  2775
Record:  2776
Record:  2777
Record:  2778
Record:  2779
Record:  2780
Record:  2781
Record:  2782
Record:  2783
Record:  2784
Record:  2785
Record:  2786
Record:  2787
Record:  2788
Record:  2789
Record:  2790
Record:  2791
Record:  2792
Record:  2793
Record:  2794
Record:  2795
Record:  2796
Record:  2797
Record:  2798
Record:  2799
Record:  2800
Record:  2801
Record:  2802
Record:  2803
Record:  2804
Record:  2805
Record:  2806
Record:  2807
Record:  2808
Record:  2809
Record:  2810
Record:  2811
Record:  2812
Record:  2813
Record:  2814
Record:  2815
Record:  2816
Record:  2817
Record:  2818
Record:  2819
Record:  2820
Record:  2821
Record:  2822
Record:  2823
Record:  2824
Record:  2825
Record:  2826
Record:  2827
Record:  2828
Record:  2829
Record:  2830
Record:  2831
Record:  2832
Record:  2833
Record:  2834
Record:  2835
Record:  2836
Record:  2837
Record:  2838
Record:  2839
Record:  2840
Record:  2841
Record:  2842
Record:  2843
Record:  2844
Record:  2845
Record:  2846
Record:  2847
Record:  2848
Record:  2849
Record:  2850
Record:  2851
Record:  2852
Record:  2853
Record:  2854
Record:  2855
Record:  2856
Record:  2857
Record:  2858
Record:  2859
Record:  2860
Record:  2861
Record:  2862
Record:  2863
Record:  2864
Record:  2865
Record:  2866
Record:  2867
Record:  2868
Record:  2869
Record:  2870
Record:  2871
Record:  2872
Record:  2873
Record:  2874
Record:  2875
Record:  2876
Record:  2877
Record:  2878
Record:  2879
Record:  2880
Record:  2881
Record:  2882
Record:  2883
Record:  2884
Record:  2885
Record:  2886
Record:  2887
Record:  2888
Record:  2889
Record:  2890
Record:  2891
Record:  2892
Record:  2893
Record:  2894
Record:  2895
Record:  2896
Record:  2897
Record:  2898
Record:  2899
Record:  2900
Record:  2901
Record:  2902
Record:  2903
Record:  2904
Record:  2905
Record:  2906
Record:  2907
Record:  2908
Record:  2909
Record:  2910
Record:  2911
Record:  2912
Record:  2913
Record:  2914
Record:  2915
Record:  2916
Record:  2917
Record:  2918
Record:  2919
Record:  2920
Record:  2921
Record:  2922
Record:  2923
Record:  2924
Record:  2925
Record:  2926
Record:  2927
Record:  2928
Record:  2929
Record:  2930
Record:  2931
Record:  2932
Record:  2933
Record:  2934
Record:  2935
Record:  2936
Record:  2937
Record:  2938
Record:  2939
Record:  2940
Record:  2941
Record:  2942
Record:  2943
Record:  2944
Record:  2945
Record:  2946
Record:  2947
Record:  2948
Record:  2949
Record:  2950
Record:  2951
Record:  2952
Record:  2953
Record:  2954
Record:  2955
Record:  2956
Record:  2957
Record:  2958
Record:  2959
Record:  2960
Record:  2961
Record:  2962
Record:  2963
Record:  2964
Record:  2965
Record:  2966
Record:  2967
Record:  2968
Record:  2969
Record:  2970
Record:  2971
Record:  2972
Record:  2973
Record:  2974
Record:  2975
Record:  2976
Record:  2977
Record:  2978
Record:  2979
Record:  2980
Record:  2981
Record:  2982
Record:  2983
Record:  2984
Record:  2985
Record:  2986
Record:  2987
Record:  2988
Record:  2989
Record:  2990
Record:  2991
Record:  2992
Record:  2993
Record:  2994
Record:  2995
Record:  2996
Record:  2997
Record:  2998
Record:  2999
Record:  3000
Record:  3001
Record:  3002
Record:  3003
Record:  3004
Record:  3005
Record:  3006
Record:  3007
Record:  3008
Record:  3009
Record:  3010
Record:  3011
Record:  3012
Record:  3013
Record:  3014
Record:  3015
Record:  3016
Record:  3017
Record:  3018
Record:  3019
Record:  3020
Record:  3021
Record:  3022
Record:  3023
Record:  3024
Record:  3025
Record:  3026
Record:  3027
Record:  3028
Record:  3029
Record:  3030
Record:  3031
Record:  3032
Record:  3033
Record:  3034
Record:  3035
Record:  3036
Record:  3037
Record:  3038
Record:  3039
Record:  3040
Record:  3041
Record:  3042
Record:  3043
Record:  3044
Record:  3045
Record:  3046
Record:  3047
Record:  3048
Record:  3049
Record:  3050
Record:  3051
Record:  3052
Record:  3053
Record:  3054
Record:  3055
Record:  3056
Record:  3057
Record:  3058
Record:  3059
Record:  3060
Record:  3061
Record:  3062
Record:  3063
Record:  3064
Record:  3065
Record:  3066
Record:  3067
Record:  3068
Record:  3069
Record:  3070
Record:  3071
Record:  3072
Record:  3073
Record:  3074
Record:  3075
Record:  3076
Record:  3077
Record:  3078
Record:  3079
Record:  3080
Record:  3081
Record:  3082
Record:  3083
Record:  3084
Record:  3085
Record:  3086
Record:  3087
Record:  3088
Record:  3089
Record:  3090
Record:  3091
Record:  3092
Record:  3093
Record:  3094
Record:  3095
Record:  3096
Record:  3097
Record:  3098
Record:  3099
Record:  3100
Record:  3101
Record:  3102
Record:  3103
Record:  3104
Record:  3105
Record:  3106
Record:  3107
Record:  3108
Record:  3109
Record:  3110
Record:  3111
Record:  3112
Record:  3113
Record:  3114
Record:  3115
Record:  3116
Record:  3117
Record:  3118
Record:  3119
Record:  3120
Record:  3121
Record:  3122
Record:  3123
Record:  3124
Record:  3125
Record:  3126
Record:  3127
Record:  3128
Record:  3129
Record:  3130
Record:  3131
Record:  3132
Record:  3133
Record:  3134
Record:  3135
Record:  3136
Record:  3137
Record:  3138
Record:  3139
Record:  3140
Record:  3141
Record:  3142
Record:  3143
Record:  3144
Record:  3145
Record:  3146
Record:  3147
Record:  3148
Record:  3149
Record:  3150
Record:  3151
Record:  3152
Record:  3153
Record:  3154
Record:  3155
Record:  3156
Record:  3157
Record:  3158
Record:  3159
Record:  3160
Record:  3161
Record:  3162
Record:  3163
Record:  3164
Record:  3165
Record:  3166
Record:  3167
Record:  3168
Record:  3169
Record:  3170
Record:  3171
Record:  3172
Record:  3173
Record:  3174
Record:  3175
Record:  3176
Record:  3177
Record:  3178
Record:  3179
Record:  3180
Record:  3181
Record:  3182
Record:  3183
Record:  3184
Record:  3185
Record:  3186
Record:  3187
Record:  3188
Record:  3189
Record:  3190
Record:  3191
Record:  3192
Record:  3193
Record:  3194
Record:  3195
Record:  3196
Record:  3197
Record:  3198
Record:  3199
Record:  3200
Record:  3201
Record:  3202
Record:  3203
Record:  3204
Record:  3205
Record:  3206
Record:  3207
Record:  3208
Record:  3209
Record:  3210
Record:  3211
Record:  3212
Record:  3213
Record:  3214
Record:  3215
Record:  3216
Record:  3217
Record:  3218
Record:  3219
Record:  3220
Record:  3221
Record:  3222
Record:  3223
Record:  3224
Record:  3225
Record:  3226
Record:  3227
Record:  3228
Record:  3229
Record:  3230
Record:  3231
Record:  3232
Record:  3233
Record:  3234
Record:  3235
Record:  3236
Record:  3237
Record:  3238
Record:  3239
Record:  3240
Record:  3241
Record:  3242
Record:  3243
Record:  3244
Record:  3245
Record:  3246
Record:  3247
Record:  3248
Record:  3249
Record:  3250
Record:  3251
Record:  3252
Record:  3253
Record:  3254
Record:  3255
Record:  3256
Record:  3257
Record:  3258
Record:  3259
Record:  3260
Record:  3261
Record:  3262
Record:  3263
Record:  3264
Record:  3265
Record:  3266
Record:  3267
Record:  3268
Record:  3269
Record:  3270
Record:  3271
Record:  3272
Record:  3273
Record:  3274
Record:  3275
Record:  3276
Record:  3277
Record:  3278
Record:  3279
Record:  3280
Record:  3281
Record:  3282
Record:  3283
Record:  3284
Record:  3285
Record:  3286
Record:  3287
Record:  3288
Record:  3289
Record:  3290
Record:  3291
Record:  3292
Record:  3293
Record:  3294
Record:  3295
Record:  3296
Record:  3297
Record:  3298
Record:  3299
Record:  3300
Record:  3301
Record:  3302
Record:  3303
Record:  3304
Record:  3305
Record:  3306
Record:  3307
Record:  3308
Record:  3309
Record:  3310
Record:  3311
Record:  3312
Record:  3313
Record:  3314
Record:  3315
Record:  3316
Record:  3317
Record:  3318
Record:  3319
Record:  3320
Record:  3321
Record:  3322
Record:  3323
Record:  3324
Record:  3325
Record:  3326
Record:  3327
Record:  3328
Record:  3329
Record:  3330
Record:  3331
Record:  3332
Record:  3333
Record:  3334
Record:  3335
Record:  3336
Record:  3337
Record:  3338
Record:  3339
Record:  3340
Record:  3341
Record:  3342
Record:  3343
Record:  3344
Record:  3345
Record:  3346
Record:  3347
Record:  3348
Record:  3349
Record:  3350
Record:  3351
Record:  3352
Record:  3353
Record:  3354
Record:  3355
Record:  3356
Record:  3357
Record:  3358
Record:  3359
Record:  3360
Record:  3361
Record:  3362
Record:  3363
Record:  3364
Record:  3365
Record:  3366
Record:  3367
Record:  3368
Record:  3369
Record:  3370
Record:  3371
Record:  3372
Record:  3373
Record:  3374
Record:  3375
Record:  3376
Record:  3377
Record:  3378
Record:  3379
Record:  3380
Record:  3381
Record:  3382
Record:  3383
Record:  3384
Record:  3385
Record:  3386
Record:  3387
Record:  3388
Record:  3389
Record:  3390
Record:  3391
Record:  3392
Record:  3393
Record:  3394
Record:  3395
Record:  3396
Record:  3397
Record:  3398
Record:  3399
Record:  3400
Record:  3401
Record:  3402
Record:  3403
Record:  3404
Record:  3405
Record:  3406
Record:  3407
Record:  3408
Record:  3409
Record:  3410
Record:  3411
Record:  3412
Record:  3413
Record:  3414
Record:  3415
Record:  3416
Record:  3417
Record:  3418
Record:  3419
Record:  3420
Record:  3421
Record:  3422
Record:  3423
Record:  3424
Record:  3425
Record:  3426
Record:  3427
Record:  3428
Record:  3429
Record:  3430
Record:  3431
Record:  3432
Record:  3433
Record:  3434
Record:  3435
Record:  3436
Record:  3437
Record:  3438
Record:  3439
Record:  3440
Record:  3441
Record:  3442
Record:  3443
Record:  3444
Record:  3445
Record:  3446
Record:  3447
Record:  3448
Record:  3449
Record:  3450
Record:  3451
Record:  3452
Record:  3453
Record:  3454
Record:  3455
Record:  3456
Record:  3457
Record:  3458
Record:  3459
Record:  3460
Record:  3461
Record:  3462
Record:  3463
Record:  3464
Record:  3465
Record:  3466
Record:  3467
Record:  3468
Record:  3469
Record:  3470
Record:  3471
Record:  3472
Record:  3473
Record:  3474
Record:  3475
Record:  3476
Record:  3477
Record:  3478
Record:  3479
Record:  3480
Record:  3481
Record:  3482
Record:  3483
Record:  3484
Record:  3485
Record:  3486
Record:  3487
Record:  3488
Record:  3489
Record:  3490
Record:  3491
Record:  3492
Record:  3493
Record:  3494
Record:  3495
Record:  3496
Record:  3497
Record:  3498
Record:  3499
Record:  3500
Record:  3501
Record:  3502
Record:  3503
Record:  3504
Record:  3505
Record:  3506
Record:  3507
Record:  3508
Record:  3509
Record:  3510
Record:  3511
Record:  3512
Record:  3513
Record:  3514
Record:  3515
Record:  3516
Record:  3517
Record:  3518
Record:  3519
Record:  3520
Record:  3521
Record:  3522
Record:  3523
Record:  3524
Record:  3525
Record:  3526
Record:  3527
Record:  3528
Record:  3529
Record:  3530
Record:  3531
Record:  3532
Record:  3533
Record:  3534
Record:  3535
Record:  3536
Record:  3537
Record:  3538
Record:  3539
Record:  3540
Record:  3541
Record:  3542
Record:  3543
Record:  3544
Record:  3545
Record:  3546
Record:  3547
Record:  3548
Record:  3549
Record:  3550
Record:  3551
Record:  3552
Record:  3553
Record:  3554
Record:  3555
Record:  3556
Record:  3557
Record:  3558
Record:  3559
Record:  3560
Record:  3561
Record:  3562
Record:  3563
Record:  3564
Record:  3565
Record:  3566
Record:  3567
Record:  3568
Record:  3569
Record:  3570
Record:  3571
Record:  3572
Record:  3573
Record:  3574
Record:  3575
Record:  3576
Record:  3577
Record:  3578
Record:  3579
Record:  3580
Record:  3581
Record:  3582
Record:  3583
Record:  3584
Record:  3585
Record:  3586
Record:  3587
Record:  3588
Record:  3589
Record:  3590
Record:  3591
Record:  3592
Record:  3593
Record:  3594
Record:  3595
Record:  3596
Record:  3597
Record:  3598
Record:  3599
Record:  3600
Record:  3601
Record:  3602
Record:  3603
Record:  3604
Record:  3605
Record:  3606
Record:  3607
Record:  3608
Record:  3609
Record:  3610
Record:  3611
Record:  3612
Record:  3613
Record:  3614
Record:  3615
Record:  3616
Record:  3617
Record:  3618
Record:  3619
Record:  3620
Record:  3621
Record:  3622
Record:  3623
Record:  3624
Record:  3625
Record:  3626
Record:  3627
Record:  3628
Record:  3629
Record:  3630
Record:  3631
Record:  3632
Record:  3633
Record:  3634
Record:  3635
Record:  3636
Record:  3637
Record:  3638
Record:  3639
Record:  3640
Record:  3641
Record:  3642
Record:  3643
Record:  3644
Record:  3645
Record:  3646
Record:  3647
Record:  3648
Record:  3649
Record:  3650
Record:  3651
Record:  3652
Record:  3653
Record:  3654
Record:  3655
Record:  3656
Record:  3657
Record:  3658
Record:  3659
Record:  3660
Record:  3661
Record:  3662
Record:  3663
Record:  3664
Record:  3665
Record:  3666
Record:  3667

In [5]:
ser = df_surveys['title']
ser.name = 'ntitle'
df_surveys = pd.concat([df_surveys,ser],axis=1)
for index,row in df_surveys.iterrows():
    ntitle = norm_str(row['title'])
    #print(ntitle)
    df_surveys.loc[df_surveys['title'] == row['title'], 'ntitle'] = ntitle

#df_crosswalk.head()

ser = df_crosswalk['SIC Industry']
ser.name = 'ntitle'
df_crosswalk = pd.concat([df_crosswalk,ser],axis=1)
for index,row in df_crosswalk.iterrows():
    ntitle = norm_str(row['SIC Industry'])
    #print(ntitle)
    df_crosswalk.loc[df_crosswalk['SIC Industry'] == row['SIC Industry'], 'ntitle'] = ntitle
#df_crosswalk.head()

In [6]:
#df_surveys.head()
mask = ((df_surveys['sic'] == 'nan') & (df_surveys['naics'] == 'nan'))
df_surveys[mask]
nosic_nonaics = df_surveys[mask]
print(nosic_nonaics.shape)
nosic_nonaics.head()
matches = 0
for index,row in nosic_nonaics.iterrows():
    #nosic_nonaics.loc[nosic_nonaics['ntitle'] == row['ntitle'],'sic'] = get_sic(row,df_crosswalk)
    df_ret = get_sic(row,df_crosswalk)
    if df_ret.shape[0] > 0:
        sic = str(df_ret['SIC'])
        sic= sic[:sic.index('\n')]
        sic = sic[sic.rindex(' '):].strip()
        print(row.title,sic)
        nosic_nonaics.loc[nosic_nonaics['ntitle'] == row['ntitle'],'sic'] = sic


(1821, 6)
Books 2730
Footwear 5139
Book Publishing 2731
Car Washes 7542
Bowling Centers 7933
Bookstores 5942
Boat Dealers 5551
Engineering Services 8711
Department Stores 5311
Footwear 5139
Life Insurance 6311
Natural Gas: Distribution 4924
Colleges and Universities 8221
Florists 5992
Furniture Stores 5712
Title Insurance 6361
Highway and Street Construction 1611
Home Healthcare Services 8082
Kidney Dialysis Centers 8092
Legal Services 8111
Marinas 4493
Musical Instrument Stores 5736
Petroleum Refining 2910
Recreational Vehicle Dealers 5560
Shoe Stores 5660
Testing Laboratories 8734
Used Merchandise Stores 5932
Veterinary Services 740
Medical Equipment Rental 7352
Dental Laboratories 8072
/Users/jammerman/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/indexing.py:415: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s

In [7]:
mask = ((df_surveys['sic'] == 'nan') & (df_surveys['naics'] == 'nan'))
df_surveys[mask]
nosic_nonaics = df_surveys[mask]
print(nosic_nonaics.shape)
nosic_nonaics.head()


(1821, 6)
Out[7]:
sur_id title sic keyword naics ntitle
16 17 Adult Entertainment nan pornographic porn nan adultentertainment
17 18 Adult Obesity nan overweight over weight obese healthcare health... nan adultobesity
18 19 Advertising nan ads direct-mail marketing agencies services nan advertising
21 24 Advertising and Marketing nan advertising nan advertisingandmarketing
26 32 Aerospace/Defense nan defense aircraft military weapons missiles air... nan aerospacedefense

In [8]:
df_crosswalk.head()


Out[8]:
SIC SIC Industry NAICS NAICS Industry ntitle
0 3291 Abrasive products 327910 Abrasive Product Manufacturing abrasiveproducts
1 2891 Adhesives and sealants 325520 Adhesive Manufacturing adhesivesandsealants
2 9511 Air, water, & solid waste management 924110 Administration of Air and Water Resource and S... airwatersolidwastemanagement
3 9512 Land, mineral, wildlife conservation 924120 Administration of Conservation Programs landmineralwildlifeconservation
4 9410 Admin. of Educational Programs 923110 Administration of Education Programs adminofeducationalprograms

In [9]:
sics


Out[9]:
[]

In [10]:
df_surveys


Out[10]:
sur_id title sic keyword naics ntitle
0 1 Abrasive Products 3291 fabricated metals grinding industrial equipme... 332999 abrasiveproducts
1 2 AC Drives nan electronics industrial equipment electric pow... 221122 acdrives
2 3 AC Drives: Global nan electronics industrial equipment electric pow... 221122 acdrivesglobal
3 4 Accident and Health Insurance 6321 Health Medical Insurance Carriers carrier Emp... 524114, 525190, 524130 accidentandhealthinsurance
4 5 Accountancy nan accounting tax services accountants designing ... 541211 accountancy
5 6 Accountancy: Global nan accounting tax services accountants designing ... 541211 accountancyglobal
6 7 Accounting, Auditing, and Bookkeeping Services 8721 accountants cpas payroll service accounting bo... 541211, 541214, 541219 accountingauditingandbookkeepingservices
7 8 Adhesives and Sealants 2891 manufacturing industrial household glues caul... 325520 adhesivesandsealants
8 9 Adjustment and Collection Services 7322 collection agencies repossession services clai... 561440, 561491 adjustmentandcollectionservices
9 10 Administration of Educational Programs 9411 educational scholarships education central coo... 923110 administrationofeducationalprograms
10 11 Administration of General Economic Programs 9611 tourism data analysis analyses develop develo... 926110 administrationofgeneraleconomicprograms
11 12 Administration of Housing Programs 9531 authorities governments building standards d... 925110 administrationofhousingprograms
12 13 Administration of Public Health Programs 9431 governments mental cancer disease control mate... 923120 administrationofpublichealthprograms
13 14 Administration of Social, Human Resource, and ... 9441 elderly senior citizens child welfare children... 923130 administrationofsocialhumanresourceandincomema...
14 15 Administration of Urban Planning and Community... 9532 slum clearance development land renewal federa... 92512 administrationofurbanplanningandcommunityandru...
15 16 Administration of Veterans Affairs, Except Hea... 9451 services training counseling governments feder... 923140 administrationofveteransaffairsexcepthealthand...
16 17 Adult Entertainment nan pornographic porn nan adultentertainment
17 18 Adult Obesity nan overweight over weight obese healthcare health... nan adultobesity
18 19 Advertising nan ads direct-mail marketing agencies services nan advertising
19 21 Advertising Agencies 7311 writing copy artwork graphics creative work pe... 541810 advertisingagencies
20 441365 Photographic Equipment and Supplies 5043 wholesalers wholesaling distribution cameras ... 421410 photographicequipmentandsupplies
21 24 Advertising and Marketing nan advertising nan advertisingandmarketing
22 26 Advertising, Not Elsewhere Classified 7319 agency aerial circular handbill distribution d... 481219, 541830 , 541850 , 541870, 541890 advertisingnotelsewhereclassified
23 27 Advertising: Global nan agencies display services marketing 541850, 541891 advertisingglobal
24 28 Aerospace and Defense nan military airplanes aeronautics aerospace plane... 481211 aerospaceanddefense
25 31 Aerospace and Defense: Global nan military airplanes aeronautics aerospace plane... 481211 aerospaceanddefenseglobal
26 32 Aerospace/Defense nan defense aircraft military weapons missiles air... nan aerospacedefense
27 33 Agribusiness nan farms agriculture farming crops dairy organics... nan agribusiness
28 34 Agricultural Products nan farms agriculture farming cereals roots tuber... 424590, 493130 agriculturalproducts
29 35 Agricultural Products: Global nan farms agriculture farming cereals roots tuber... nan agriculturalproductsglobal
... ... ... ... ... ... ...
3764 443192 Private Firefighting Services nan firefighters nan privatefirefightingservices
3765 443193 Retail Store Fixture Dealers nan fixtures Storage racks retail stores nan retailstorefixturedealers
3766 443194 Tower Crane Manufacturing nan cranes towers high rises buildings nan towercranemanufacturing
3767 443195 Scaffolding Contractors nan construction remodeling painting nan scaffoldingcontractors
3768 443196 Dump Truck Services nan dump trucks nan dumptruckservices
3769 443197 Ornamental & Architectural Metalwork Services nan fences, handrails, balcony rails, screen doors... nan ornamentalarchitecturalmetalworkservices
3770 443198 Metal Door Manufacturing nan windows doors manufacturing screens moldings t... nan metaldoormanufacturing
3771 443199 Bridge & Structure Painting Contractors nan bridges coatings cables nan bridgestructurepaintingcontractors
3772 443200 Firestop Contractors nan firestops nan firestopcontractors
3773 443201 Concrete Reinforcing Bar Manufacturing nan rebar construction bridges buildings highways ... nan concretereinforcingbarmanufacturing
3774 443202 Reconstituted Wood Product Manufacturing nan NaN nan reconstitutedwoodproductmanufacturing
3775 443203 Wood Flooring Manufacturing nan homes floors coverings textiles household viny... nan woodflooringmanufacturing
3776 443204 Window Coverings Wholesaling nan curtains, blinds and draperies nan windowcoveringswholesaling
3777 443205 Corrosion Protection Engineering Services nan coatings nan corrosionprotectionengineeringservices
3778 443206 Cementing Oil & Gas Well Services nan wells nan cementingoilgaswellservices
3779 443207 Soundproofing Contractors nan NaN nan soundproofingcontractors
3780 443208 Water & Air Quality Testing Services nan Asbestos mold bacteria nan waterairqualitytestingservices
3781 443209 Commercial Property Remodeling nan remodels buildings nan commercialpropertyremodeling
3782 443210 Precision Agriculture Systems & Services nan agricultural harvesting crops farms farming nan precisionagriculturesystemsservices
3783 443211 3D Printer Manufacturing nan printing manufacturing three-dimensional prin... nan 3dprintermanufacturing
3784 443212 Night Vision Equipment Manufacturing nan NVD's binoculars goggles nan nightvisionequipmentmanufacturing
3785 443213 Headphone Manufacturing nan headphones earphones, earbuds nan headphonemanufacturing
3786 443214 LIDAR Device Manufacturing nan ranges ranging distances lasers nan lidardevicemanufacturing
3787 443215 Security Paper Printing nan watermarks watermarked ink nan securitypaperprinting
3788 443216 Uninterruptible Power Supply System Manufacturing nan blackouts nan uninterruptiblepowersupplysystemmanufacturing
3789 443217 Variable Message Signage Manufacturing nan signs nan variablemessagesignagemanufacturing
3790 443218 Carbon Monoxide Alarm Manufacturing nan alarms CO2 nan carbonmonoxidealarmmanufacturing
3791 443219 Fire & Smoke Alarm Manufacturing nan alarms nan firesmokealarmmanufacturing
3792 443220 Electronic Access Control System Manufacturing nan smart cards security door locks biometrics nan electronicaccesscontrolsystemmanufacturing
3793 443221 Activated Carbon Manufacturing nan purification nan activatedcarbonmanufacturing

3794 rows × 6 columns


In [11]:
records = pymarc.parse_xml_to_array(codecs.open('PardeeIS.xml','rb','utf-8'))

In [12]:
url_dict = {}
counter = 0
clientsibisworld = [] 
wwwibisworld = [] 
umi = []
lexisnexis = []
mintel = [] 
galegroup = []
ebscohost = [] 
mergentonline = [] 
doi = []
for rec in records:
    rec = marcx.FatRecord.from_record(rec)

    domain = rec['856']['u'][:rec['856']['u'].rindex('/')]
#    domain = domain[domain.index('.')+1:domain.index('.',domain.index('.')+1)]
    domain = domain[domain.index(':')+3:domain.index('.',domain.index('.')+1)] 
    #print(domain)
    if domain == 'www.ibisworld':
        rec.remove('856')
        rec.add('856', u='http://clients1.ibisworld.com/reports/us/industry/home.aspx',z='IBISWorld Industry Reports',n='IBISWorld',indicators='40')
        clientsibisworld.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'clients1.ibisworld':
        rec.remove('856')
        rec.add('856', u='http://clients1.ibisworld.com/reports/us/industry/home.aspx',z='IBISWorld Industry Reports',n='IBISWorld',indicators='40')
        clientsibisworld.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'clients.ibisworld':
        rec.remove('856')
        rec.add('856', u='http://clients1.ibisworld.com/reports/us/industry/home.aspx',z='IBISWorld Industry Reports',n='IBISWorld',indicators='40')
        clientsibisworld.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'www.lexisnexis':
        url = rec['856']['u'].replace('.ezproxy.bu.edu','')
        rec.remove('856')
        rec.add('856', u=url,z='Hoovers Industry Snapshots',n='Lexis Nexis',indicators='40')
        lexisnexis.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'proquest.umi':
        rec.remove('856')
        rec.add('856', u='http://search.proquest.com/browseterms/firstResearch_business?accountid=9676',z='First Research Reports',n='ABI-Inform',indicators='40')
        umi.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'academic.mintel':
        mintel.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'find.galegroup':
        url = rec['856']['u'].replace('.ezproxy.bu.edu','')
        _z = rec['856']['z']
        _n = rec['856']['n']
        rec.remove('856')
        rec.add('856', u=url,z=_z,n=_z,indicators='40')
        galegroup.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'search.ebscohost':
        url = rec['856']['u'].replace('.ezproxy.bu.edu','')
        _z = rec['856']['z']
        _n = rec['856']['n']
        rec.remove('856')
        rec.add('856', u=url,z=_z,n=_z,indicators='40')
        ebscohost.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'www.mergentonline':
        url = rec['856']['u'].replace('.ezproxy.bu.edu','')
        _z = rec['856']['z']
        _n = rec['856']['n']
        rec.remove('856')
        rec.add('856', u=url,z=_z,n=_z,indicators='40')
        mergentonline.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
    if domain == 'dx.doi':
        url = rec['856']['u'].replace('.ezproxy.bu.edu','')
        _z = rec['856']['z']
        _n = rec['856']['n']
        rec.remove('856')
        rec.add('856', u=url,z=_z,n=_z,indicators='40')
        doi.append(rec)
        if rec['856']['u'][:rec['856']['u'].rindex('/')] in url_dict:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] += 1
        else:
            url_dict[rec['856']['u'][:rec['856']['u'].rindex('/')]] = 1
                    
for k,v in url_dict.items():
    print(str(counter),k,str(v))
    counter += 1


0 http://www.lexisnexis.com/us/lnacademic/search 1
1 http://search.proquest.com/browseterms 326
2 http://find.galegroup.com/gvrl 996
3 http://www.lexisnexis.com/us/lnacademic/api/version1 127
4 http://clients1.ibisworld.com/reports/us/industry 1213
5 http://dx.doi.org/10.1787 8
6 http://www.mergentonline.com 25
7 http://academic.mintel.com 562
8 http://search.ebscohost.com 409

In [17]:
groups = [(clientsibisworld,'ibis.xml'),(umi,'umi.xml'),
          (lexisnexis,'lexisnexis.xml'),(mintel,'mintel.xml'),
          (galegroup,'galegroup.xml'),(ebscohost,'ebscohost.xml'),
          (mergentonline,'mergentonline.xml'),(doi,'doi.xml'),]
for group in groups:
    print()
    file_name = group[1]
    out = codecs.open(file_name,'w','utf-8')
    print(file_name)
    records = group[0]  
    out.write(header)
    for rec in records:
        out.write(pymarc.record_to_xml(rec ).decode("utf-8"))
        #print(rec)
    out.write(footer)
    out.close()


ibis.xml

umi.xml

lexisnexis.xml

mintel.xml

galegroup.xml

ebscohost.xml

mergentonline.xml

doi.xml

In [ ]: