In [ ]:
import pandas as pd
import pyodbc
#run this: pip install git+https://github.com/pandas-profiling/pandas-profiling.git
import pandas_profiling
import re
import os
connection_string = "mssql+pyodbc://SERVER\INSTANCE/DATABASE?driver=SQL Server Native Client 11.0"
table_schema='dbo'
table_name = 'TABLE_NAME'
output_file = os.getcwd() + re.sub(r'\W+', '', table_name) + '.html'
In [ ]:
df_date_cols = pd.read_sql_query(
sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where TABLE_NAME='"+table_name+"' and TABLE_SCHEMA='"+table_schema+"' and data_type like 'date%'",
con = connection_string
)
table_date_columns=df_date_cols['column_name'].tolist()
In [ ]:
df = pd.read_sql_query(
sql = "SELECT * FROM " + table_name + " WHERE 1;",
con = connection_string,
coerce_float=True,
parse_dates = table_date_columns
)
df.head().T
In [ ]:
profile = pandas_profiling.ProfileReport(df)
res = profile.to_file(outputfile=output_file)
profile
In [ ]: