In [1]:
    
!pip install pandas-profiling
    
    
Requirement already satisfied: pandas-profiling in /Users/peta/anaconda3/lib/python3.6/site-packages
Requirement already satisfied: six>=1.9 in /Users/peta/anaconda3/lib/python3.6/site-packages (from pandas-profiling)
Requirement already satisfied: matplotlib>=1.4 in /Users/peta/anaconda3/lib/python3.6/site-packages (from pandas-profiling)
Requirement already satisfied: jinja2>=2.8 in /Users/peta/anaconda3/lib/python3.6/site-packages (from pandas-profiling)
Requirement already satisfied: pandas>=0.19 in /Users/peta/anaconda3/lib/python3.6/site-packages (from pandas-profiling)
Requirement already satisfied: numpy>=1.7.1 in /Users/peta/anaconda3/lib/python3.6/site-packages (from matplotlib>=1.4->pandas-profiling)
Requirement already satisfied: python-dateutil in /Users/peta/anaconda3/lib/python3.6/site-packages (from matplotlib>=1.4->pandas-profiling)
Requirement already satisfied: pytz in /Users/peta/anaconda3/lib/python3.6/site-packages (from matplotlib>=1.4->pandas-profiling)
Requirement already satisfied: cycler>=0.10 in /Users/peta/anaconda3/lib/python3.6/site-packages (from matplotlib>=1.4->pandas-profiling)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=1.5.6 in /Users/peta/anaconda3/lib/python3.6/site-packages (from matplotlib>=1.4->pandas-profiling)
Requirement already satisfied: MarkupSafe>=0.23 in /Users/peta/anaconda3/lib/python3.6/site-packages (from jinja2>=2.8->pandas-profiling)
In [2]:
    
import pandas as pd
import pandas_profiling
    
In [3]:
    
df = pd.read_csv("diabetesqld-AgegroupRegistration-171020.csv", parse_dates=['Registration dates'], encoding='UTF-8')
    
In [4]:
    
pandas_profiling.ProfileReport(df)
    
    Out[4]:
    
        Overview
    
    
    
        Dataset info
        
            
            
                Number of variables 
                12  
             
            
                Number of observations 
                363  
             
            
                Total Missing (%) 
                22.6%  
             
            
                Total size in memory 
                34.1 KiB  
             
            
                Average record size in memory 
                96.2 B  
             
            
        
    
    
        Variables types
        
            
            
                Numeric 
                0  
             
            
                Categorical 
                11  
             
            
                Date 
                1  
             
            
                Text (Unique) 
                0  
             
            
                Rejected 
                0  
             
            
        
    
    
        Warnings
        00-15 has 331 / 91.2% missing values Missing16-20 has 288 / 79.3% missing values Missing21-29 has 188 / 51.8% missing values Missing30-39 has 112 / 30.9% missing values Missing30-39 has a high cardinality: 62 distinct values  Warning40-49 has 44 / 12.1% missing values Missing40-49 has a high cardinality: 116 distinct values  Warning50-59 has 15 / 4.1% missing values Missing50-59 has a high cardinality: 176 distinct values  Warning60-69 has a high cardinality: 211 distinct values  Warning70-79 has a high cardinality: 180 distinct values  Warning80-89 has a high cardinality: 110 distinct values  Warning90+ has 5 / 1.4% missing values MissingGrand Total has a high cardinality: 301 distinct values  Warning 
    
    
        Variables
    
    
    
        00-15
            Categorical
        
    
    
        
            Distinct count 
            2 
         
        
            Unique (%) 
            6.2% 
         
        
            Missing (%) 
            91.2% 
         
        
            Missing (n) 
            331 
         
    
    
        
    . 
    
        
             
        
        32
     
 
    (Missing) 
    
        
            331
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        32 
        8.8% 
        
             
         
 
        (Missing) 
        331 
        91.2% 
        
             
         
 
    
        16-20
            Categorical
        
    
    
        
            Distinct count 
            2 
         
        
            Unique (%) 
            2.7% 
         
        
            Missing (%) 
            79.3% 
         
        
            Missing (n) 
            288 
         
    
    
        
    . 
    
        
            75
        
        
     
 
    (Missing) 
    
        
            288
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        75 
        20.7% 
        
             
         
 
        (Missing) 
        288 
        79.3% 
        
             
         
 
    
        21-29
            Categorical
        
    
    
        
            Distinct count 
            16 
         
        
            Unique (%) 
            9.1% 
         
        
            Missing (%) 
            51.8% 
         
        
            Missing (n) 
            188 
         
    
    
        
    . 
    
        
            131
        
        
     
 
    11 
    
        
             
        
        9
     
 
    12 
    
        
             
        
        7
     
 
    Other values (12) 
    
        
             
        
        28
     
 
    (Missing) 
    
        
            188
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        131 
        36.1% 
        
             
         
 
        11 
        9 
        2.5% 
        
             
         
 
        12 
        7 
        1.9% 
        
             
         
 
        15 
        5 
        1.4% 
        
             
         
 
        16 
        4 
        1.1% 
        
             
         
 
        17 
        4 
        1.1% 
        
             
         
 
        25 
        3 
        0.8% 
        
             
         
 
        24 
        2 
        0.6% 
        
             
         
 
        13 
        2 
        0.6% 
        
             
         
 
        22 
        2 
        0.6% 
        
             
         
 
        Other values (5) 
        6 
        1.7% 
        
             
         
 
        (Missing) 
        188 
        51.8% 
        
             
         
 
    
        30-39
            Categorical
        
    
    
        
            Distinct count 
            62 
         
        
            Unique (%) 
            24.7% 
         
        
            Missing (%) 
            30.9% 
         
        
            Missing (n) 
            112 
         
    
    
        
    . 
    
        
            111
        
        
     
 
    11 
    
        
             
        
        7
     
 
    51 
    
        
             
        
        6
     
 
    Other values (58) 
    
        
            127
        
        
     
 
    (Missing) 
    
        
            112
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        111 
        30.6% 
        
             
         
 
        11 
        7 
        1.9% 
        
             
         
 
        51 
        6 
        1.7% 
        
             
         
 
        15 
        6 
        1.7% 
        
             
         
 
        23 
        6 
        1.7% 
        
             
         
 
        14 
        5 
        1.4% 
        
             
         
 
        50 
        5 
        1.4% 
        
             
         
 
        66 
        4 
        1.1% 
        
             
         
 
        21 
        4 
        1.1% 
        
             
         
 
        36 
        4 
        1.1% 
        
             
         
 
        Other values (51) 
        93 
        25.6% 
        
             
         
 
        (Missing) 
        112 
        30.9% 
        
             
         
 
    
        40-49
            Categorical
        
    
    
        
            Distinct count 
            116 
         
        
            Unique (%) 
            36.4% 
         
        
            Missing (%) 
            12.1% 
         
        
            Missing (n) 
            44 
         
    
    
        
    . 
    
        
            56
        
        
     
 
    11 
    
        
             
        
        11
     
 
    18 
    
        
             
        
        8
     
 
    Other values (112) 
    
        
            244
        
        
     
 
    (Missing) 
    
        
             
        
        44
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        56 
        15.4% 
        
             
         
 
        11 
        11 
        3.0% 
        
             
         
 
        18 
        8 
        2.2% 
        
             
         
 
        14 
        7 
        1.9% 
        
             
         
 
        17 
        7 
        1.9% 
        
             
         
 
        16 
        6 
        1.7% 
        
             
         
 
        15 
        6 
        1.7% 
        
             
         
 
        33 
        6 
        1.7% 
        
             
         
 
        26 
        5 
        1.4% 
        
             
         
 
        12 
        5 
        1.4% 
        
             
         
 
        Other values (105) 
        202 
        55.6% 
        
             
         
 
        (Missing) 
        44 
        12.1% 
        
             
         
 
    
        50-59
            Categorical
        
    
    
        
            Distinct count 
            176 
         
        
            Unique (%) 
            50.6% 
         
        
            Missing (%) 
            4.1% 
         
        
            Missing (n) 
            15 
         
    
    
        
    . 
    
        
             
        
        53
     
 
    78 
    
        
             
        
        8
     
 
    30 
    
        
             
        
        6
     
 
    Other values (172) 
    
        
            281
        
        
     
 
    (Missing) 
    
        
             
        
        15
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        53 
        14.6% 
        
             
         
 
        78 
        8 
        2.2% 
        
             
         
 
        30 
        6 
        1.7% 
        
             
         
 
        33 
        5 
        1.4% 
        
             
         
 
        32 
        5 
        1.4% 
        
             
         
 
        35 
        5 
        1.4% 
        
             
         
 
        38 
        4 
        1.1% 
        
             
         
 
        106 
        4 
        1.1% 
        
             
         
 
        264 
        4 
        1.1% 
        
             
         
 
        28 
        4 
        1.1% 
        
             
         
 
        Other values (165) 
        250 
        68.9% 
        
             
         
 
        (Missing) 
        15 
        4.1% 
        
             
         
 
    
        60-69
            Categorical
        
    
    
        
            Distinct count 
            211 
         
        
            Unique (%) 
            58.4% 
         
        
            Missing (%) 
            0.6% 
         
        
            Missing (n) 
            2 
         
    
    
        
    . 
    
        
             
        
        46
     
 
    14 
    
        
             
        
        6
     
 
    35 
    
        
             
        
        5
     
 
    Other values (207) 
    
        
            304
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        46 
        12.7% 
        
             
         
 
        14 
        6 
        1.7% 
        
             
         
 
        35 
        5 
        1.4% 
        
             
         
 
        248 
        5 
        1.4% 
        
             
         
 
        12 
        4 
        1.1% 
        
             
         
 
        173 
        4 
        1.1% 
        
             
         
 
        27 
        4 
        1.1% 
        
             
         
 
        63 
        3 
        0.8% 
        
             
         
 
        15 
        3 
        0.8% 
        
             
         
 
        315 
        3 
        0.8% 
        
             
         
 
        Other values (200) 
        278 
        76.6% 
        
             
         
 
    
        70-79
            Categorical
        
    
    
        
            Distinct count 
            180 
         
        
            Unique (%) 
            49.6% 
         
        
            Missing (%) 
            0.0% 
         
        
            Missing (n) 
            0 
         
    
    
        
    . 
    
        
             
        
        36
     
 
    187 
    
        
             
        
        7
     
 
    232 
    
        
             
        
        5
     
 
    Other values (177) 
    
        
            315
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        36 
        9.9% 
        
             
         
 
        187 
        7 
        1.9% 
        
             
         
 
        232 
        5 
        1.4% 
        
             
         
 
        11 
        5 
        1.4% 
        
             
         
 
        212 
        5 
        1.4% 
        
             
         
 
        36 
        5 
        1.4% 
        
             
         
 
        44 
        4 
        1.1% 
        
             
         
 
        25 
        4 
        1.1% 
        
             
         
 
        205 
        4 
        1.1% 
        
             
         
 
        219 
        4 
        1.1% 
        
             
         
 
        Other values (170) 
        284 
        78.2% 
        
             
         
 
    
        80-89
            Categorical
        
    
    
        
            Distinct count 
            110 
         
        
            Unique (%) 
            30.3% 
         
        
            Missing (%) 
            0.0% 
         
        
            Missing (n) 
            0 
         
    
    
        
    . 
    
        
             
        
        44
     
 
    88 
    
        
             
        
        9
     
 
    106 
    
        
             
        
        8
     
 
    Other values (107) 
    
        
            302
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        44 
        12.1% 
        
             
         
 
        88 
        9 
        2.5% 
        
             
         
 
        106 
        8 
        2.2% 
        
             
         
 
        22 
        8 
        2.2% 
        
             
         
 
        96 
        7 
        1.9% 
        
             
         
 
        109 
        6 
        1.7% 
        
             
         
 
        97 
        6 
        1.7% 
        
             
         
 
        23 
        6 
        1.7% 
        
             
         
 
        58 
        6 
        1.7% 
        
             
         
 
        83 
        6 
        1.7% 
        
             
         
 
        Other values (100) 
        257 
        70.8% 
        
             
         
 
    
        90+
            Categorical
        
    
    
        
            Distinct count 
            23 
         
        
            Unique (%) 
            6.4% 
         
        
            Missing (%) 
            1.4% 
         
        
            Missing (n) 
            5 
         
    
    
        
    . 
    
        
            149
        
        
     
 
    12 
    
        
             
        
        28
     
 
    11 
    
        
             
        
        25
     
 
    Other values (19) 
    
        
            156
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        . 
        149 
        41.0% 
        
             
         
 
        12 
        28 
        7.7% 
        
             
         
 
        11 
        25 
        6.9% 
        
             
         
 
        14 
        23 
        6.3% 
        
             
         
 
        13 
        20 
        5.5% 
        
             
         
 
        16 
        19 
        5.2% 
        
             
         
 
        15 
        14 
        3.9% 
        
             
         
 
        19 
        13 
        3.6% 
        
             
         
 
        17 
        12 
        3.3% 
        
             
         
 
        20 
        12 
        3.3% 
        
             
         
 
        Other values (12) 
        43 
        11.8% 
        
             
         
 
    
        Grand Total
            Categorical
        
    
    
        
            Distinct count 
            301 
         
        
            Unique (%) 
            82.9% 
         
        
            Missing (%) 
            0.0% 
         
        
            Missing (n) 
            0 
         
    
    
        
    15 
    
        
             
        
        6
     
 
    14 
    
        
             
        
        5
     
 
    . 
    
        
             
        
        4
     
 
    Other values (298) 
    
        
            348
        
        
     
 
    
    
    
    
        Value 
        Count 
        Frequency (%) 
          
     
    
    
        15 
        6 
        1.7% 
        
             
         
 
        14 
        5 
        1.4% 
        
             
         
 
        . 
        4 
        1.1% 
        
             
         
 
        13 
        4 
        1.1% 
        
             
         
 
        22 
        4 
        1.1% 
        
             
         
 
        253 
        3 
        0.8% 
        
             
         
 
        475 
        3 
        0.8% 
        
             
         
 
        205 
        3 
        0.8% 
        
             
         
 
        548 
        3 
        0.8% 
        
             
         
 
        11 
        3 
        0.8% 
        
             
         
 
        Other values (291) 
        325 
        89.5% 
        
             
         
 
    
        Registration dates
            Date
        
    
    
        
            
                
                    Distinct count 
                    363 
                 
                
                    Unique (%) 
                    100.0% 
                 
                
                    Missing (%) 
                    0.0% 
                 
                
                    Missing (n) 
                    0 
                 
                
                    Infinite (%) 
                    0.0% 
                 
                
                    Infinite (n) 
                    0 
                 
            
        
        
            
                
                    Minimum 
                    1987-08-01 00:00:00 
                 
                
                    Maximum 
                    2017-10-01 00:00:00 
                 
            
        
    
    
 
    
 
    
        Sample
    
    
    
        
  
    
       
      Registration dates 
      00-15 
      16-20 
      21-29 
      30-39 
      40-49 
      50-59 
      60-69 
      70-79 
      80-89 
      90+ 
      Grand Total 
     
  
  
    
      0 
      1988-04-01 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      . 
      . 
      . 
      . 
      13 
     
    
      1 
      1989-04-01 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      . 
      . 
      . 
      . 
      14 
     
    
      2 
      1990-04-01 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      . 
      . 
      . 
      . 
      . 
      22 
     
    
      3 
      1991-04-01 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      . 
      . 
      15 
      12 
      . 
      38 
     
    
      4 
      1992-04-01 
      NaN 
      NaN 
      NaN 
      NaN 
      . 
      . 
      20 
      21 
      21 
      12 
      78 
     
  
    
In [ ]:
    
pfr = pandas_profiling.ProfileReport(df)
pfr.to_file("diabetesqld-agegroupregistration-profile.html")
    
In [ ]:
    
#### Print existing ProfileReport object inline
pfr
    
Content source: bigdatabigheart/Datasets
Similar notebooks: