notebook.community

Edit and run



In [1]:

    
import pandas



In [2]:

    
print dir(pandas)









    



['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Expr', 'Float64Index', 'Grouper', 'HDFStore', 'Index', 'IndexSlice', 'Int64Index', 'Interval', 'IntervalIndex', 'MultiIndex', 'NaT', 'Panel', 'Panel4D', 'Period', 'PeriodIndex', 'RangeIndex', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseList', 'SparseSeries', 'Term', 'TimeGrouper', 'Timedelta', 'TimedeltaIndex', 'Timestamp', 'UInt64Index', 'WidePanel', '_DeprecatedModule', '__builtins__', '__doc__', '__docformat__', '__file__', '__name__', '__package__', '__path__', '__version__', '_hashtable', '_lib', '_libs', '_np_version_under1p10', '_np_version_under1p11', '_np_version_under1p12', '_np_version_under1p13', '_np_version_under1p14', '_np_version_under1p15', '_tslib', '_version', 'api', 'bdate_range', 'compat', 'concat', 'core', 'crosstab', 'cut', 'date_range', 'datetime', 'datetools', 'describe_option', 'errors', 'eval', 'ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', 'ewmvol', 'expanding_apply', 'expanding_corr', 'expanding_count', 'expanding_cov', 'expanding_kurt', 'expanding_max', 'expanding_mean', 'expanding_median', 'expanding_min', 'expanding_quantile', 'expanding_skew', 'expanding_std', 'expanding_sum', 'expanding_var', 'factorize', 'get_dummies', 'get_option', 'get_store', 'groupby', 'infer_freq', 'interval_range', 'io', 'isna', 'isnull', 'json', 'lib', 'lreshape', 'match', 'melt', 'merge', 'merge_asof', 'merge_ordered', 'notna', 'notnull', 'np', 'offsets', 'option_context', 'options', 'ordered_merge', 'pandas', 'parser', 'period_range', 'pivot', 'pivot_table', 'plot_params', 'plotting', 'pnow', 'qcut', 'read_clipboard', 'read_csv', 'read_excel', 'read_feather', 'read_fwf', 'read_gbq', 'read_hdf', 'read_html', 'read_json', 'read_msgpack', 'read_parquet', 'read_pickle', 'read_sas', 'read_sql', 'read_sql_query', 'read_sql_table', 'read_stata', 'read_table', 'reset_option', 'rolling_apply', 'rolling_corr', 'rolling_count', 'rolling_cov', 'rolling_kurt', 'rolling_max', 'rolling_mean', 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', 'rolling_var', 'rolling_window', 'scatter_matrix', 'set_eng_float_format', 'set_option', 'show_versions', 'stats', 'test', 'testing', 'timedelta_range', 'to_datetime', 'to_msgpack', 'to_numeric', 'to_pickle', 'to_timedelta', 'tools', 'tseries', 'tslib', 'unique', 'util', 'value_counts', 'wide_to_long']



In [14]:

    
pd = pandas.DataFrame([[1,2,3], [4,5,6,7]])



In [15]:

    
pd



In [18]:

    
a = {'a':"python", "b":"Java", "c":"Jenkins"}
b = {'a':"python", "b":".net"}
pd = pandas.DataFrame([a,b])



In [19]:

    
pd









    Out[19]:







  
    
      
      a
      b
      c
    
  
  
    
      0
      python
      Java
      Jenkins
    
    
      1
      python
      .net
      NaN



In [20]:

    
pd = pandas.DataFrame([[1,2,3], [4,5,6,7]])



In [21]:

    
pd



In [26]:

    
x = pandas.DataFrame([[1,2,3], [4,5,6,7]], columns=['fir', 'sec', 'thi', 'four'])



In [27]:

    
x



In [30]:

    
x = pandas.DataFrame([[1,2,3], [4,5,6,7]], columns=['fir', 'sec', 'thi', 'four'], index=['row1', 'row2'])



In [31]:

    
x



In [32]:

    
type(x)









    Out[32]:





pandas.core.frame.DataFrame



In [33]:

    
print dir(x)









    



['T', '_AXIS_ALIASES', '_AXIS_IALIASES', '_AXIS_LEN', '_AXIS_NAMES', '_AXIS_NUMBERS', '_AXIS_ORDERS', '_AXIS_REVERSED', '_AXIS_SLICEMAP', '__abs__', '__add__', '__and__', '__array__', '__array_wrap__', '__bool__', '__bytes__', '__class__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__doc__', '__eq__', '__finalize__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lt__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rand__', '__rdiv__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__', '__rsub__', '__rtruediv__', '__rxor__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__', '__unicode__', '__weakref__', '__xor__', '_accessors', '_add_numeric_operations', '_add_series_only_operations', '_add_series_or_dataframe_operations', '_agg_by_level', '_agg_doc', '_aggregate', '_aggregate_multiple_funcs', '_align_frame', '_align_series', '_apply_broadcast', '_apply_empty_result', '_apply_raw', '_apply_standard', '_at', '_box_col_values', '_box_item_values', '_builtin_table', '_check_inplace_setting', '_check_is_chained_assignment_possible', '_check_percentile', '_check_setitem_copy', '_clear_item_cache', '_clip_with_one_bound', '_clip_with_scalar', '_combine_const', '_combine_frame', '_combine_match_columns', '_combine_match_index', '_combine_series', '_combine_series_infer', '_compare_frame', '_compare_frame_evaluate', '_consolidate', '_consolidate_inplace', '_construct_axes_dict', '_construct_axes_dict_for_slice', '_construct_axes_dict_from', '_construct_axes_from_arguments', '_constructor', '_constructor_expanddim', '_constructor_sliced', '_convert', '_count_level', '_create_indexer', '_cython_table', '_deprecations', '_dir_additions', '_dir_deletions', '_drop_axis', '_ensure_valid_index', '_expand_axes', '_flex_compare_frame', '_from_arrays', '_from_axes', '_get_agg_axis', '_get_axis', '_get_axis_name', '_get_axis_number', '_get_axis_resolvers', '_get_block_manager_axis', '_get_bool_data', '_get_cacher', '_get_index_resolvers', '_get_item_cache', '_get_numeric_data', '_get_valid_indices', '_get_value', '_get_values', '_getitem_array', '_getitem_column', '_getitem_frame', '_getitem_multilevel', '_getitem_slice', '_gotitem', '_iat', '_iget_item_cache', '_iloc', '_indexed_same', '_info_axis', '_info_axis_name', '_info_axis_number', '_info_repr', '_init_dict', '_init_mgr', '_init_ndarray', '_internal_names', '_internal_names_set', '_is_builtin_func', '_is_cached', '_is_cython_func', '_is_datelike_mixed_type', '_is_mixed_type', '_is_numeric_mixed_type', '_is_view', '_ix', '_ixs', '_join_compat', '_loc', '_maybe_cache_changed', '_maybe_update_cacher', '_metadata', '_needs_reindex_multi', '_obj_with_exclusions', '_protect_consolidate', '_reduce', '_reindex_axes', '_reindex_axis', '_reindex_columns', '_reindex_index', '_reindex_multi', '_reindex_with_indexers', '_repr_data_resource_', '_repr_fits_horizontal_', '_repr_fits_vertical_', '_repr_html_', '_repr_latex_', '_reset_cache', '_reset_cacher', '_sanitize_column', '_selected_obj', '_selection', '_selection_list', '_selection_name', '_series', '_set_as_cached', '_set_axis', '_set_axis_name', '_set_is_copy', '_set_item', '_set_value', '_setitem_array', '_setitem_frame', '_setitem_slice', '_setup_axes', '_shallow_copy', '_slice', '_stat_axis', '_stat_axis_name', '_stat_axis_number', '_take', '_to_dict_of_blocks', '_try_aggregate_string_function', '_typ', '_unpickle_frame_compat', '_unpickle_matrix_compat', '_update_inplace', '_validate_dtype', '_values', '_where', '_xs', 'abs', 'add', 'add_prefix', 'add_suffix', 'agg', 'aggregate', 'align', 'all', 'any', 'append', 'apply', 'applymap', 'as_matrix', 'asfreq', 'asof', 'assign', 'astype', 'at', 'at_time', 'axes', 'between_time', 'bfill', 'bool', 'boxplot', 'clip', 'clip_lower', 'clip_upper', 'columns', 'combine', 'combine_first', 'compound', 'copy', 'corr', 'corrwith', 'count', 'cov', 'cummax', 'cummin', 'cumprod', 'cumsum', 'describe', 'diff', 'div', 'divide', 'dot', 'drop', 'drop_duplicates', 'dropna', 'dtypes', 'duplicated', 'empty', 'eq', 'equals', 'eval', 'ewm', 'expanding', 'ffill', 'fillna', 'filter', 'fir', 'first', 'first_valid_index', 'floordiv', 'four', 'from_dict', 'from_items', 'from_records', 'ftypes', 'ge', 'get', 'get_dtype_counts', 'get_ftype_counts', 'get_values', 'groupby', 'gt', 'head', 'hist', 'iat', 'idxmax', 'idxmin', 'iloc', 'index', 'infer_objects', 'info', 'insert', 'interpolate', 'is_copy', 'isin', 'isna', 'isnull', 'items', 'iteritems', 'iterrows', 'itertuples', 'ix', 'join', 'keys', 'kurt', 'kurtosis', 'last', 'last_valid_index', 'le', 'loc', 'lookup', 'lt', 'mad', 'mask', 'max', 'mean', 'median', 'melt', 'memory_usage', 'merge', 'min', 'mod', 'mode', 'mul', 'multiply', 'ndim', 'ne', 'nlargest', 'notna', 'notnull', 'nsmallest', 'nunique', 'pct_change', 'pipe', 'pivot', 'pivot_table', 'plot', 'pop', 'pow', 'prod', 'product', 'quantile', 'query', 'radd', 'rank', 'rdiv', 'reindex', 'reindex_axis', 'reindex_like', 'rename', 'rename_axis', 'reorder_levels', 'replace', 'resample', 'reset_index', 'rfloordiv', 'rmod', 'rmul', 'rolling', 'round', 'rpow', 'rsub', 'rtruediv', 'sample', 'sec', 'select', 'select_dtypes', 'sem', 'set_axis', 'set_index', 'shape', 'shift', 'size', 'skew', 'slice_shift', 'sort_index', 'sort_values', 'squeeze', 'stack', 'std', 'style', 'sub', 'subtract', 'sum', 'swapaxes', 'swaplevel', 'tail', 'take', 'thi', 'to_clipboard', 'to_csv', 'to_dense', 'to_dict', 'to_excel', 'to_feather', 'to_gbq', 'to_hdf', 'to_html', 'to_json', 'to_latex', 'to_msgpack', 'to_panel', 'to_parquet', 'to_period', 'to_pickle', 'to_records', 'to_sparse', 'to_sql', 'to_stata', 'to_string', 'to_timestamp', 'to_xarray', 'transform', 'transpose', 'truediv', 'truncate', 'tshift', 'tz_convert', 'tz_localize', 'unstack', 'update', 'values', 'var', 'where', 'xs']



In [35]:

    
x.shape









    Out[35]:





(2, 4)



In [36]:

    
x.transpose()



In [37]:

    
x[]



In [38]:

    
x









    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-38-2f755f117ac9> in <module>()
----> 1 x[0]

c:\python27\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
   2137             return self._getitem_multilevel(key)
   2138         else:
-> 2139             return self._getitem_column(key)
   2140 
   2141     def _getitem_column(self, key):

c:\python27\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
   2144         # get column
   2145         if self.columns.is_unique:
-> 2146             return self._get_item_cache(key)
   2147 
   2148         # duplicate columns & possible reduce dimensionality

c:\python27\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
   1840         res = cache.get(item)
   1841         if res is None:
-> 1842             values = self._data.get(item)
   1843             res = self._box_item_values(item, values)
   1844             cache[item] = res

c:\python27\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
   3836 
   3837             if not isna(item):
-> 3838                 loc = self.items.get_loc(item)
   3839             else:
   3840                 indexer = np.arange(len(self.items))[isna(self.items)]

c:\python27\lib\site-packages\pandas\core\indexes\base.pyc in get_loc(self, key, method, tolerance)
   2522                 return self._engine.get_loc(key)
   2523             except KeyError:
-> 2524                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2525 
   2526         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 0



In [39]:

    
x



In [41]:

    
x.drop('row2')



In [44]:

    
print str(pandas.read_excel.__doc__)









    



Read an Excel table into a pandas DataFrame

Parameters
----------
io : string, path object (pathlib.Path or py._path.local.LocalPath),
    file-like object, pandas ExcelFile, or xlrd workbook.
    The string could be a URL. Valid URL schemes include http, ftp, s3,
    and file. For file URLs, a host is expected. For instance, a local
    file could be file://localhost/path/to/workbook.xlsx
sheet_name : string, int, mixed list of strings/ints, or None, default 0

    Strings are used for sheet names, Integers are used in zero-indexed
    sheet positions.

    Lists of strings/integers are used to request multiple sheets.

    Specify None to get all sheets.

    str|int -> DataFrame is returned.
    list|None -> Dict of DataFrames is returned, with keys representing
    sheets.

    Available Cases

    * Defaults to 0 -> 1st sheet as a DataFrame
    * 1 -> 2nd sheet as a DataFrame
    * "Sheet1" -> 1st sheet as a DataFrame
    * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames
    * None -> All sheets as a dictionary of DataFrames

sheetname : string, int, mixed list of strings/ints, or None, default 0
    .. deprecated:: 0.21.0
       Use `sheet_name` instead

header : int, list of ints, default 0
    Row (0-indexed) to use for the column labels of the parsed
    DataFrame. If a list of integers is passed those row positions will
    be combined into a ``MultiIndex``. Use None if there is no header.
skiprows : list-like
    Rows to skip at the beginning (0-indexed)
skip_footer : int, default 0
    Rows at the end to skip (0-indexed)
index_col : int, list of ints, default None
    Column (0-indexed) to use as the row labels of the DataFrame.
    Pass None if there is no such column.  If a list is passed,
    those columns will be combined into a ``MultiIndex``.  If a
    subset of data is selected with ``usecols``, index_col
    is based on the subset.
names : array-like, default None
    List of column names to use. If file contains no header row,
    then you should explicitly pass header=None
converters : dict, default None
    Dict of functions for converting values in certain columns. Keys can
    either be integers or column labels, values are functions that take one
    input argument, the Excel cell content, and return the transformed
    content.
dtype : Type name or dict of column -> type, default None
    Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
    Use `object` to preserve data as stored in Excel and not interpret dtype.
    If converters are specified, they will be applied INSTEAD
    of dtype conversion.

    .. versionadded:: 0.20.0

true_values : list, default None
    Values to consider as True

    .. versionadded:: 0.19.0

false_values : list, default None
    Values to consider as False

    .. versionadded:: 0.19.0

parse_cols : int or list, default None
    .. deprecated:: 0.21.0
       Pass in `usecols` instead.

usecols : int or list, default None
    * If None then parse all columns,
    * If int then indicates last column to be parsed
    * If list of ints then indicates list of column numbers to be parsed
    * If string then indicates comma separated list of Excel column letters and
      column ranges (e.g. "A:E" or "A,C,E:F").  Ranges are inclusive of
      both sides.
squeeze : boolean, default False
    If the parsed data only contains one column then return a Series
na_values : scalar, str, list-like, or dict, default None
    Additional strings to recognize as NA/NaN. If dict passed, specific
    per-column NA values. By default the following values are interpreted
    as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
'1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'.
thousands : str, default None
    Thousands separator for parsing string columns to numeric.  Note that
    this parameter is only necessary for columns stored as TEXT in Excel,
    any numeric columns will automatically be parsed, regardless of display
    format.
keep_default_na : bool, default True
    If na_values are specified and keep_default_na is False the default NaN
    values are overridden, otherwise they're appended to.
verbose : boolean, default False
    Indicate number of NA values placed in non-numeric columns
engine: string, default None
    If io is not a buffer or path, this must be set to identify io.
    Acceptable values are None or xlrd
convert_float : boolean, default True
    convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
    data will be read in as floats: Excel stores all numbers as floats
    internally

Returns
-------
parsed : DataFrame or Dict of DataFrames
    DataFrame from the passed in Excel file.  See notes in sheet_name
    argument for more information on when a Dict of Dataframes is returned.



In [50]:

    
x = pandas.read_excel('excel.xlsx', sheetname=0)



In [51]:

    
x









    Out[51]:







  
    
      
      sub
      time
      batch
      location
      faculty
    
  
  
    
      0
      python
      08:30:00
      1
      hyderabad
      Nag
    
    
      1
      python
      08:30:00
      2
      hyderabad
      Nag
    
    
      2
      python
      08:30:00
      1
      Chennai
      Nag
    
    
      3
      AWS
      07:30:00
      1
      hyderabad
      xxxxx



In [52]:

    
x=x.transpose()



In [55]:

    
x









    Out[55]:







  
    
      
      0
      1
      2
      3
    
  
  
    
      sub
      python
      python
      python
      AWS
    
    
      time
      08:30:00
      08:30:00
      08:30:00
      07:30:00
    
    
      batch
      1
      2
      1
      1
    
    
      location
      hyderabad
      hyderabad
      Chennai
      hyderabad
    
    
      faculty
      Nag
      Nag
      Nag
      xxxxx



In [56]:

    
x['ID']=[i+1 for i in xrange(len(x.index))]



In [57]:

    
x









    Out[57]:







  
    
      
      0
      1
      2
      3
      ID
    
  
  
    
      sub
      python
      python
      python
      AWS
      1
    
    
      time
      08:30:00
      08:30:00
      08:30:00
      07:30:00
      2
    
    
      batch
      1
      2
      1
      1
      3
    
    
      location
      hyderabad
      hyderabad
      Chennai
      hyderabad
      4
    
    
      faculty
      Nag
      Nag
      Nag
      xxxxx
      5



In [58]:

    
x.set_index('ID')



In [ ]:

	sub	time	batch	location	faculty
0	python	08:30:00	1	hyderabad	Nag
1	python	08:30:00	2	hyderabad	Nag
2	python	08:30:00	1	Chennai	Nag
3	AWS	07:30:00	1	hyderabad	xxxxx