In [1]:
import pandas as pd
In [67]:
opsim = pd.read_csv('output_opsim2_168.dat', delimiter=r"\s+")
In [75]:
opsim.propID.unique()
Out[75]:
array([357, 358, 356, 361, 360, 359, 362])
In [78]:
def closestFields(opsimOut, ra, dec):
"""
Parameters
----------
opsimOut: `~pd.DataFrame` having opSim output
ra: RA in degrees
dec: Dec in degrees
"""
ra = np.radians(ra)
dec = np.radians(dec)
opsimOut['fieldRA'] - ra
Out[78]:
array([-1.5708 , -1.52836 , -1.48829 , -1.47891 , -1.44511 , -1.4307 ,
-1.40526 , -1.39975 , -1.38265 , -1.38264 , -1.36252 , -1.35325 ,
-1.3345 , -1.32195 , -1.31801 , -1.30606 , -1.2862 , -1.27922 ,
-1.27229 , -1.25838 , -1.23802 , -1.2377 , -1.23496 , -1.23495 ,
-1.22571 , -1.2103 , -1.19504 , -1.18953 , -1.189 , -1.17845 ,
-1.16189 , -1.15314 , -1.15066 , -1.14319 , -1.14009 , -1.13067 ,
-1.11321 , -1.10965 , -1.10516 , -1.09611 , -1.09102 , -1.08247 ,
-1.06687 , -1.06484 , -1.06432 , -1.05873 , -1.04846 , -1.04183 ,
-1.03397 , -1.02258 , -1.02257 , -1.01893 , -1.01531 , -1.01156 ,
-1.00036 , -0.992611, -0.985272, -0.978625, -0.977006, -0.972101,
-0.9721 , -0.966267, -0.966265, -0.963797, -0.963795, -0.951958,
-0.943427, -0.936476, -0.933238, -0.930367, -0.930365, -0.924532,
-0.924531, -0.917265, -0.917263, -0.915584, -0.903353, -0.894354,
-0.8879 , -0.887673, -0.887671, -0.886648, -0.882847, -0.882845,
-0.87637 , -0.876368, -0.86838 , -0.867034, -0.867032, -0.854628,
-0.854627, -0.845448, -0.841268, -0.841266, -0.839072, -0.83892 ,
-0.834604, -0.834602, -0.827714, -0.827712, -0.819662, -0.818202,
-0.8182 , -0.805817, -0.805815, -0.796741, -0.794554, -0.794552,
-0.793594, -0.793592, -0.790683, -0.790248, -0.785732, -0.778598,
-0.778597, -0.771137, -0.771135, -0.769078, -0.769076, -0.756888,
-0.748244, -0.746802, -0.745078, -0.745076, -0.741639, -0.741638,
-0.741589, -0.741587, -0.73626 , -0.728979, -0.722804, -0.722802,
-0.719568, -0.707741, -0.70774 , -0.699942, -0.699034, -0.69824 ,
-0.695849, -0.695847, -0.693016, -0.691833, -0.686154, -0.678746,
-0.678745, -0.674643, -0.674641, -0.669493, -0.669491, -0.65815 ,
-0.6518 , -0.65053 , -0.650528, -0.649013, -0.645978, -0.644203,
-0.644201, -0.641422, -0.635345, -0.627741, -0.627739, -0.626596,
-0.626594, -0.618574, -0.618573, -0.607725, -0.607724, -0.603743,
-0.603741, -0.602193, -0.601447, -0.599215, -0.59551 , -0.595508,
-0.594793, -0.590352, -0.583778, -0.583776, -0.578517, -0.575821,
-0.575819, -0.566518, -0.555894, -0.555538, -0.555537, -0.553388,
-0.553386, -0.551894, -0.551892, -0.548922, -0.544504, -0.543878,
-0.543876, -0.538682, -0.53868 , -0.531502, -0.5315 , -0.529758,
-0.523011, -0.513259, -0.506084, -0.504955, -0.504203, -0.504201,
-0.502281, -0.501955, -0.501953, -0.498246, -0.493129, -0.490071,
-0.486661, -0.486659, -0.478899, -0.476347, -0.469907, -0.463648,
-0.463647, -0.459746, -0.45624 , -0.456238, -0.454719, -0.454718,
-0.451709, -0.44852 , -0.447272, -0.44727 , -0.441474, -0.43666 ,
-0.436659, -0.4344 , -0.429166, -0.426138, -0.426136, -0.416809,
-0.416807, -0.408116, -0.407346, -0.407344, -0.406674, -0.406672,
-0.405061, -0.40506 , -0.401323, -0.401321, -0.396488, -0.396223,
-0.389873, -0.389872, -0.388638, -0.382409, -0.381145, -0.381143,
-0.374024, -0.365071, -0.365069, -0.359939, -0.358409, -0.356251,
-0.355408, -0.355407, -0.351045, -0.348822, -0.345462, -0.34546 ,
-0.340943, -0.340941, -0.338836, -0.338835, -0.331407, -0.323528,
-0.323526, -0.315768, -0.315767, -0.312595, -0.311838, -0.311836,
-0.309602, -0.3096 , -0.308862, -0.30886 , -0.306 , -0.305998,
-0.301401, -0.301195, -0.298113, -0.298111, -0.295397, -0.295395,
-0.288855, -0.281906, -0.274999, -0.268608, -0.265424, -0.265423,
-0.263957, -0.263955, -0.261674, -0.261117, -0.261115, -0.25715 ,
-0.257148, -0.257075, -0.252052, -0.24631 , -0.246308, -0.240147,
-0.233942, -0.228029, -0.228027, -0.221615, -0.219248, -0.218531,
-0.218529, -0.216691, -0.216431, -0.214798, -0.213112, -0.21311 ,
-0.208795, -0.208794, -0.203741, -0.19824 , -0.198238, -0.192613,
-0.192611, -0.187127, -0.187125, -0.181197, -0.181195, -0.176226,
-0.176224, -0.173377, -0.173376, -0.173319, -0.173318, -0.173317,
-0.171989, -0.171988, -0.169338, -0.169337, -0.165646, -0.165645,
-0.165644, -0.161175, -0.156209, -0.156208, -0.156207, -0.151046,
-0.151045, -0.14592 , -0.145919, -0.140422, -0.140421, -0.14042 ,
-0.135554, -0.135553, -0.132125, -0.132124, -0.130868, -0.130867,
-0.128524, -0.128523, -0.127823, -0.127822, -0.125799, -0.122655,
-0.122654, -0.118663, -0.118662, -0.114112, -0.114111, -0.109295,
-0.109294, -0.104445, -0.09931 , -0.099309, -0.094596, -0.094595,
-0.090907, -0.090906, -0.088843, -0.08394 , -0.08253 , -0.079883,
-0.079882, -0.076277, -0.072026, -0.072025, -0.067435, -0.067434,
-0.062758, -0.062757, -0.057902, -0.053329, -0.049506, -0.049505,
-0.046915, -0.046914, -0.045991, -0.04599 , -0.040284, -0.040283,
-0.039527, -0.039526, -0.037368, -0.037367, -0.03409 , -0.034089,
-0.030037, -0.025554, -0.025553, -0.020936, -0.020935, -0.016257,
-0.016256, -0.011771, -0.01177 , -0.007847, -0.007846, -0.004883,
-0.004882, -0.003271, -0.00327 , 0.00327 , 0.003271, 0.004882,
0.004883, 0.007846, 0.007847, 0.01177 , 0.011771, 0.016256,
0.016257, 0.020935, 0.020936, 0.025553, 0.025554, 0.030037,
0.034089, 0.03409 , 0.037367, 0.037368, 0.039526, 0.039527,
0.040283, 0.040284, 0.04599 , 0.045991, 0.046914, 0.046915,
0.049505, 0.049506, 0.053329, 0.057902, 0.062757, 0.062758,
0.067434, 0.067435, 0.072025, 0.072026, 0.076277, 0.079882,
0.079883, 0.08253 , 0.08394 , 0.088843, 0.090906, 0.090907,
0.094595, 0.094596, 0.099309, 0.09931 , 0.104445, 0.109294,
0.109295, 0.114111, 0.114112, 0.118662, 0.118663, 0.122654,
0.122655, 0.125799, 0.127822, 0.127823, 0.128523, 0.128524,
0.130867, 0.130868, 0.132124, 0.132125, 0.135553, 0.135554,
0.14042 , 0.140421, 0.140422, 0.145919, 0.14592 , 0.151045,
0.151046, 0.156207, 0.156208, 0.156209, 0.161175, 0.165644,
0.165645, 0.165646, 0.169337, 0.169338, 0.171988, 0.171989,
0.173317, 0.173318, 0.173319, 0.173376, 0.173377, 0.176224,
0.176226, 0.181195, 0.181197, 0.187125, 0.187127, 0.192611,
0.192613, 0.198238, 0.19824 , 0.203741, 0.208794, 0.208795,
0.21311 , 0.213112, 0.214798, 0.216431, 0.216691, 0.218529,
0.218531, 0.219248, 0.221615, 0.228027, 0.228029, 0.233942,
0.240147, 0.246308, 0.24631 , 0.252052, 0.257075, 0.257148,
0.25715 , 0.261115, 0.261117, 0.261674, 0.263955, 0.263957,
0.265423, 0.265424, 0.268608, 0.274999, 0.281906, 0.288855,
0.295395, 0.295397, 0.298111, 0.298113, 0.301195, 0.301401,
0.305998, 0.306 , 0.30886 , 0.308862, 0.3096 , 0.309602,
0.311836, 0.311838, 0.312595, 0.315767, 0.315768, 0.323526,
0.323528, 0.331407, 0.338835, 0.338836, 0.340941, 0.340943,
0.34546 , 0.345462, 0.348822, 0.351045, 0.355407, 0.355408,
0.356251, 0.358409, 0.359939, 0.365069, 0.365071, 0.374024,
0.381143, 0.381145, 0.382409, 0.388638, 0.389873, 0.396223,
0.396488, 0.401323, 0.40506 , 0.406674, 0.407346, 0.408116,
0.416809, 0.426136, 0.429166, 0.4344 , 0.43666 , 0.441474,
0.44727 , 0.447272, 0.44852 , 0.451709, 0.454718, 0.454719,
0.456238, 0.45624 , 0.459746, 0.463648, 0.469907, 0.476347,
0.478899, 0.486659, 0.486661, 0.490071, 0.493129, 0.498246,
0.501953, 0.502281, 0.504201, 0.504203, 0.504955, 0.506084,
0.513259, 0.523011, 0.529758, 0.531502, 0.538682, 0.543876,
0.544504, 0.548922, 0.551892, 0.553388, 0.555537, 0.555894,
0.566518, 0.575819, 0.575821, 0.618574, 0.627739])
In [53]:
df = pd.read_csv('136496.dat', skiprows=2, delimiter=r"\s+", names=['time', 'filters'])
In [60]:
df.dtypes.time = np.float
df.dtypes.filters = np.int
In [76]:
opsim.columns
Out[76]:
Index([u'obsHistID', u'sessionID', u'propID', u'fieldID', u'filter', u'seqnNum', u'subseq', u'pairNum', u'expDate', u'expMJD', u'night', u'expTime', u'slewTime', u'slewDist', u'rotSkyPos', u'rotTelPos', u'fldVisits', u'fldInt', u'fldFltrInt', u'propRank', u'finRank', u'maxSeeing', u'rawSeeing', u'seeing', u'xparency', u'cldSeeing', u'airmass', u'VskyBright', u'filtSky', u'fieldRA', u'fieldDec', u'lst', u'altitude', u'azimuth', u'dist2Moon', u'moonRA', u'moonDec', u'moonAlt', u'moonPhase', u'sunAlt', u'sunAz', u'phaseAngle', u'rScatter', u'mieScatter', u'moonIllum', u'moonBright', u'darkBright', u'solarElong', u'5sigma', u'perry_skybrightness', u'5sigma_ps', u'skybrightness_modified', u'5sigma_modified', u'hexdithra', u'hexdithdec', u'vertex'], dtype='object')
In [63]:
df.filename = '136496.dat'
In [66]:
df
Out[66]:
time
filters
0
262076
6
1
263320
6
2
263358
6
3
954080
5
4
954118
5
5
955492
5
6
955531
5
7
1737953
6
8
16534294
1
9
16534718
1
10
17480500
6
11
17825845
6
12
17826153
6
13
17826192
6
14
17828816
6
15
17830206
6
16
17830245
6
17
17831176
6
18
17831253
6
19
18175258
5
20
20757171
4
21
20758823
4
22
20758900
4
23
20759777
4
24
20759897
4
25
20761286
4
26
22498012
5
27
22498051
5
28
22585034
5
29
22654331
6
...
...
...
1505
310971082
5
1506
310971121
5
1507
310971157
5
1508
310971195
5
1509
310972388
5
1510
310972427
5
1511
310972773
5
1512
310972811
5
1513
310973157
5
1514
310973195
5
1515
311221916
4
1516
311223710
4
1517
311230237
4
1518
311230276
4
1519
311231703
4
1520
311231857
4
1521
312170386
2
1522
312173169
2
1523
312947069
4
1524
312949262
4
1525
313291384
5
1526
313291463
5
1527
313385065
5
1528
313385104
5
1529
313391092
5
1530
313391205
5
1531
313391475
5
1532
313464912
5
1533
313464948
5
1534
313465220
5
1535 rows × 2 columns
In [56]:
df.dtypes
Out[56]:
time int64
filters int64
dtype: object
In [48]:
type(df)
Out[48]:
pandas.core.frame.DataFrame
In [45]:
data = np.loadtxt('136496.dat', skiprows=2, dtype=[np.float, np.int], co=['time', 'filterindex'])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-45-7be6576cda10> in <module>()
----> 1 data = np.loadtxt('136496.dat', skiprows=2, dtype=[np.float, np.int], columns=['time', 'filterindex'])
TypeError: loadtxt() got an unexpected keyword argument 'columns'
In [22]:
import numpy as np
In [23]:
help(np.loadtxt)
Help on function loadtxt in module numpy.lib.npyio:
loadtxt(fname, dtype=<type 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0)
Load data from a text file.
Each row in the text file must have the same number of values.
Parameters
----------
fname : file or str
File, filename, or generator to read. If the filename extension is
``.gz`` or ``.bz2``, the file is first decompressed. Note that
generators should return byte strings for Python 3k.
dtype : data-type, optional
Data-type of the resulting array; default: float. If this is a
record data-type, the resulting array will be 1-dimensional, and
each row will be interpreted as an element of the array. In this
case, the number of columns used must match the number of fields in
the data-type.
comments : str, optional
The character used to indicate the start of a comment;
default: '#'.
delimiter : str, optional
The string used to separate values. By default, this is any
whitespace.
converters : dict, optional
A dictionary mapping column number to a function that will convert
that column to a float. E.g., if column 0 is a date string:
``converters = {0: datestr2num}``. Converters can also be used to
provide a default value for missing data (but see also `genfromtxt`):
``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None.
skiprows : int, optional
Skip the first `skiprows` lines; default: 0.
usecols : sequence, optional
Which columns to read, with 0 being the first. For example,
``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
The default, None, results in all columns being read.
unpack : bool, optional
If True, the returned array is transposed, so that arguments may be
unpacked using ``x, y, z = loadtxt(...)``. When used with a record
data-type, arrays are returned for each field. Default is False.
ndmin : int, optional
The returned array will have at least `ndmin` dimensions.
Otherwise mono-dimensional axes will be squeezed.
Legal values: 0 (default), 1 or 2.
.. versionadded:: 1.6.0
Returns
-------
out : ndarray
Data read from the text file.
See Also
--------
load, fromstring, fromregex
genfromtxt : Load data with missing values handled as specified.
scipy.io.loadmat : reads MATLAB data files
Notes
-----
This function aims to be a fast reader for simply formatted files. The
`genfromtxt` function provides more sophisticated handling of, e.g.,
lines with missing values.
Examples
--------
>>> from StringIO import StringIO # StringIO behaves like a file object
>>> c = StringIO("0 1\n2 3")
>>> np.loadtxt(c)
array([[ 0., 1.],
[ 2., 3.]])
>>> d = StringIO("M 21 72\nF 35 58")
>>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
... 'formats': ('S1', 'i4', 'f4')})
array([('M', 21, 72.0), ('F', 35, 58.0)],
dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
>>> c = StringIO("1,0,2\n3,0,4")
>>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
>>> x
array([ 1., 3.])
>>> y
array([ 2., 4.])
In [ ]:
In [8]:
help(pd.DataFrame.from_csv)
Help on method from_csv in module pandas.core.frame:
from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) method of __builtin__.type instance
Read delimited file into DataFrame
Parameters
----------
path : string file path or file handle / StringIO
header : int, default 0
Row to use at header (skip prior rows)
sep : string, default ','
Field delimiter
index_col : int or sequence, default 0
Column to use for index. If a sequence is given, a MultiIndex
is used. Different default from read_table
parse_dates : boolean, default True
Parse dates. Different default from read_table
tupleize_cols : boolean, default False
write multi_index columns as a list of tuples (if True)
or new (expanded format) if False)
infer_datetime_format: boolean, default False
If True and `parse_dates` is True for a column, try to infer the
datetime format based on the first datetime string. If the format
can be inferred, there often will be a large parsing speed-up.
Notes
-----
Preferable to use read_table for most general purposes but from_csv
makes for an easy roundtrip to and from file, especially with a
DataFrame of time series data
Returns
-------
y : DataFrame
In [3]:
help(pd.DataFrame)
Help on class DataFrame in module pandas.core.frame:
class DataFrame(pandas.core.generic.NDFrame)
| Two-dimensional size-mutable, potentially heterogeneous tabular data
| structure with labeled axes (rows and columns). Arithmetic operations
| align on both row and column labels. Can be thought of as a dict-like
| container for Series objects. The primary pandas data structure
|
| Parameters
| ----------
| data : numpy ndarray (structured or homogeneous), dict, or DataFrame
| Dict can contain Series, arrays, constants, or list-like objects
| index : Index or array-like
| Index to use for resulting frame. Will default to np.arange(n) if
| no indexing information part of input data and no index provided
| columns : Index or array-like
| Column labels to use for resulting frame. Will default to
| np.arange(n) if no column labels are provided
| dtype : dtype, default None
| Data type to force, otherwise infer
| copy : boolean, default False
| Copy data from inputs. Only affects DataFrame / 2d ndarray input
|
| Examples
| --------
| >>> d = {'col1': ts1, 'col2': ts2}
| >>> df = DataFrame(data=d, index=index)
| >>> df2 = DataFrame(np.random.randn(10, 5))
| >>> df3 = DataFrame(np.random.randn(10, 5),
| ... columns=['a', 'b', 'c', 'd', 'e'])
|
| See also
| --------
| DataFrame.from_records : constructor from tuples, also record arrays
| DataFrame.from_dict : from dicts of Series, arrays, or dicts
| DataFrame.from_csv : from CSV files
| DataFrame.from_items : from sequence of (key, value) pairs
| pandas.read_csv, pandas.read_table, pandas.read_clipboard
|
| Method resolution order:
| DataFrame
| pandas.core.generic.NDFrame
| pandas.core.base.PandasObject
| pandas.core.base.StringMixin
| __builtin__.object
|
| Methods defined here:
|
| __add__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __add__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __and__(self, other, axis='columns', level=None, fill_value=None)
| Binary operator __and__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __div__ = __truediv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __truediv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __eq__(self, other)
| Wrapper for comparison method __eq__
|
| __floordiv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __floordiv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __ge__(self, other)
| Wrapper for comparison method __ge__
|
| __getitem__(self, key)
|
| __gt__(self, other)
| Wrapper for comparison method __gt__
|
| __iadd__ = f(self, other)
|
| __idiv__ = __truediv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __truediv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __imul__ = f(self, other)
|
| __init__(self, data=None, index=None, columns=None, dtype=None, copy=False)
|
| __ipow__ = f(self, other)
|
| __isub__ = f(self, other)
|
| __itruediv__ = f(self, other)
|
| __le__(self, other)
| Wrapper for comparison method __le__
|
| __len__(self)
| Returns length of info axis, but here we use the index
|
| __lt__(self, other)
| Wrapper for comparison method __lt__
|
| __mod__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __mod__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __mul__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __mul__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __ne__(self, other)
| Wrapper for comparison method __ne__
|
| __or__(self, other, axis='columns', level=None, fill_value=None)
| Binary operator __or__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __pow__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __pow__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __radd__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __radd__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rand__(self, other, axis='columns', level=None, fill_value=None)
| Binary operator __rand__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rdiv__ = __rtruediv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rtruediv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rfloordiv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rfloordiv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rmod__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rmod__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rmul__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rmul__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __ror__(self, other, axis='columns', level=None, fill_value=None)
| Binary operator __ror__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rpow__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rpow__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rsub__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rsub__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rtruediv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __rtruediv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __rxor__(self, other, axis='columns', level=None, fill_value=None)
| Binary operator __rxor__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __setitem__(self, key, value)
|
| __sub__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __sub__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __truediv__(self, other, axis=None, level=None, fill_value=None)
| Binary operator __truediv__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| __unicode__(self)
| Return a string representation for a particular DataFrame
|
| Invoked by unicode(df) in py2 only. Yields a Unicode String in both
| py2/py3.
|
| __xor__(self, other, axis='columns', level=None, fill_value=None)
| Binary operator __xor__ with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| add(self, other, axis='columns', level=None, fill_value=None)
| Binary operator add with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| all(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs)
| Return whether all elements are True over requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| bool_only : boolean, default None
| Include only boolean data. If None, will attempt to use everything,
| then use only boolean data
|
| Returns
| -------
| all : Series or DataFrame (if level specified)
|
| any(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs)
| Return whether any element is True over requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| bool_only : boolean, default None
| Include only boolean data. If None, will attempt to use everything,
| then use only boolean data
|
| Returns
| -------
| any : Series or DataFrame (if level specified)
|
| append(self, other, ignore_index=False, verify_integrity=False)
| Append rows of `other` to the end of this frame, returning a new
| object. Columns not in this frame are added as new columns.
|
| Parameters
| ----------
| other : DataFrame or Series/dict-like object, or list of these
| The data to append.
| ignore_index : boolean, default False
| If True, do not use the index labels.
| verify_integrity : boolean, default False
| If True, raise ValueError on creating index with duplicates.
|
| Returns
| -------
| appended : DataFrame
|
| Notes
| -----
| If a list of dict/series is passed and the keys are all contained in the
| DataFrame's index, the order of the columns in the resulting DataFrame
| will be unchanged.
|
| See also
| --------
| pandas.concat : General function to concatenate DataFrame, Series
| or Panel objects
|
| Examples
| --------
|
| >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
| >>> df
| A B
| 0 1 2
| 1 3 4
| >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))
| >>> df.append(df2)
| A B
| 0 1 2
| 1 3 4
| 0 5 6
| 1 7 8
|
| With `ignore_index` set to True:
|
| >>> df.append(df2, ignore_index=True)
| A B
| 0 1 2
| 1 3 4
| 2 5 6
| 3 7 8
|
| apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)
| Applies function along input axis of DataFrame.
|
| Objects passed to functions are Series objects having index
| either the DataFrame's index (axis=0) or the columns (axis=1).
| Return type depends on whether passed function aggregates, or the
| reduce argument if the DataFrame is empty.
|
| Parameters
| ----------
| func : function
| Function to apply to each column/row
| axis : {0, 1}
| * 0 : apply function to each column
| * 1 : apply function to each row
| broadcast : boolean, default False
| For aggregation functions, return object of same size with values
| propagated
| reduce : boolean or None, default None
| Try to apply reduction procedures. If the DataFrame is empty,
| apply will use reduce to determine whether the result should be a
| Series or a DataFrame. If reduce is None (the default), apply's
| return value will be guessed by calling func an empty Series (note:
| while guessing, exceptions raised by func will be ignored). If
| reduce is True a Series will always be returned, and if False a
| DataFrame will always be returned.
| raw : boolean, default False
| If False, convert each row or column into a Series. If raw=True the
| passed function will receive ndarray objects instead. If you are
| just applying a NumPy reduction function this will achieve much
| better performance
| args : tuple
| Positional arguments to pass to function in addition to the
| array/series
| Additional keyword arguments will be passed as keywords to the function
|
| Notes
| -----
| In the current implementation apply calls func twice on the
| first column/row to decide whether it can take a fast or slow
| code path. This can lead to unexpected behavior if func has
| side-effects, as they will take effect twice for the first
| column/row.
|
| Examples
| --------
| >>> df.apply(numpy.sqrt) # returns DataFrame
| >>> df.apply(numpy.sum, axis=0) # equiv to df.sum(0)
| >>> df.apply(numpy.sum, axis=1) # equiv to df.sum(1)
|
| See also
| --------
| DataFrame.applymap: For elementwise operations
|
| Returns
| -------
| applied : Series or DataFrame
|
| applymap(self, func)
| Apply a function to a DataFrame that is intended to operate
| elementwise, i.e. like doing map(func, series) for each series in the
| DataFrame
|
| Parameters
| ----------
| func : function
| Python function, returns a single value from a single value
|
| Returns
| -------
| applied : DataFrame
|
| See also
| --------
| DataFrame.apply : For operations on rows/columns
|
| assign(self, **kwargs)
| Assign new columns to a DataFrame, returning a new object
| (a copy) with all the original columns in addition to the new ones.
|
| .. versionadded:: 0.16.0
|
| Parameters
| ----------
| kwargs : keyword, value pairs
| keywords are the column names. If the values are
| callable, they are computed on the DataFrame and
| assigned to the new columns. If the values are
| not callable, (e.g. a Series, scalar, or array),
| they are simply assigned.
|
| Returns
| -------
| df : DataFrame
| A new DataFrame with the new columns in addition to
| all the existing columns.
|
| Notes
| -----
| Since ``kwargs`` is a dictionary, the order of your
| arguments may not be preserved, and so the order of the
| new columns is not well defined. Assigning multiple
| columns within the same ``assign`` is possible, but you cannot
| reference other columns created within the same ``assign`` call.
|
| Examples
| --------
| >>> df = DataFrame({'A': range(1, 11), 'B': np.random.randn(10)})
|
| Where the value is a callable, evaluated on `df`:
|
| >>> df.assign(ln_A = lambda x: np.log(x.A))
| A B ln_A
| 0 1 0.426905 0.000000
| 1 2 -0.780949 0.693147
| 2 3 -0.418711 1.098612
| 3 4 -0.269708 1.386294
| 4 5 -0.274002 1.609438
| 5 6 -0.500792 1.791759
| 6 7 1.649697 1.945910
| 7 8 -1.495604 2.079442
| 8 9 0.549296 2.197225
| 9 10 -0.758542 2.302585
|
| Where the value already exists and is inserted:
|
| >>> newcol = np.log(df['A'])
| >>> df.assign(ln_A=newcol)
| A B ln_A
| 0 1 0.426905 0.000000
| 1 2 -0.780949 0.693147
| 2 3 -0.418711 1.098612
| 3 4 -0.269708 1.386294
| 4 5 -0.274002 1.609438
| 5 6 -0.500792 1.791759
| 6 7 1.649697 1.945910
| 7 8 -1.495604 2.079442
| 8 9 0.549296 2.197225
| 9 10 -0.758542 2.302585
|
| boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds)
| Make a box plot from DataFrame column optionally grouped by some columns or
| other inputs
|
| Parameters
| ----------
| data : the pandas object holding the data
| column : column name or list of names, or vector
| Can be any valid input to groupby
| by : string or sequence
| Column in the DataFrame to group by
| ax : Matplotlib axes object, optional
| fontsize : int or string
| rot : label rotation angle
| figsize : A tuple (width, height) in inches
| grid : Setting this to True will show the grid
| layout : tuple (optional)
| (rows, columns) for the layout of the plot
| return_type : {'axes', 'dict', 'both'}, default 'dict'
| The kind of object to return. 'dict' returns a dictionary
| whose values are the matplotlib Lines of the boxplot;
| 'axes' returns the matplotlib axes the boxplot is drawn on;
| 'both' returns a namedtuple with the axes and dict.
|
| When grouping with ``by``, a dict mapping columns to ``return_type``
| is returned.
|
| kwds : other plotting keyword arguments to be passed to matplotlib boxplot
| function
|
| Returns
| -------
| lines : dict
| ax : matplotlib Axes
| (ax, lines): namedtuple
|
| Notes
| -----
| Use ``return_type='dict'`` when you want to tweak the appearance
| of the lines after plotting. In this case a dict containing the Lines
| making up the boxes, caps, fliers, medians, and whiskers is returned.
|
| combine(self, other, func, fill_value=None, overwrite=True)
| Add two DataFrame objects and do not propagate NaN values, so if for a
| (column, time) one frame is missing a value, it will default to the
| other frame's value (which might be NaN as well)
|
| Parameters
| ----------
| other : DataFrame
| func : function
| fill_value : scalar value
| overwrite : boolean, default True
| If True then overwrite values for common keys in the calling frame
|
| Returns
| -------
| result : DataFrame
|
| combineAdd(self, other)
| Add two DataFrame objects and do not propagate
| NaN values, so if for a (column, time) one frame is missing a
| value, it will default to the other frame's value (which might
| be NaN as well)
|
| Parameters
| ----------
| other : DataFrame
|
| Returns
| -------
| DataFrame
|
| combineMult(self, other)
| Multiply two DataFrame objects and do not propagate NaN values, so if
| for a (column, time) one frame is missing a value, it will default to
| the other frame's value (which might be NaN as well)
|
| Parameters
| ----------
| other : DataFrame
|
| Returns
| -------
| DataFrame
|
| combine_first(self, other)
| Combine two DataFrame objects and default to non-null values in frame
| calling the method. Result index columns will be the union of the
| respective indexes and columns
|
| Parameters
| ----------
| other : DataFrame
|
| Examples
| --------
| a's values prioritized, use values from b to fill holes:
|
| >>> a.combine_first(b)
|
|
| Returns
| -------
| combined : DataFrame
|
| compound(self, axis=None, skipna=None, level=None)
| Return the compound percentage of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| compounded : Series or DataFrame (if level specified)
|
| corr(self, method='pearson', min_periods=1)
| Compute pairwise correlation of columns, excluding NA/null values
|
| Parameters
| ----------
| method : {'pearson', 'kendall', 'spearman'}
| * pearson : standard correlation coefficient
| * kendall : Kendall Tau correlation coefficient
| * spearman : Spearman rank correlation
| min_periods : int, optional
| Minimum number of observations required per pair of columns
| to have a valid result. Currently only available for pearson
| and spearman correlation
|
| Returns
| -------
| y : DataFrame
|
| corrwith(self, other, axis=0, drop=False)
| Compute pairwise correlation between rows or columns of two DataFrame
| objects.
|
| Parameters
| ----------
| other : DataFrame
| axis : {0, 1}
| 0 to compute column-wise, 1 for row-wise
| drop : boolean, default False
| Drop missing indices from result, default returns union of all
|
| Returns
| -------
| correls : Series
|
| count(self, axis=0, level=None, numeric_only=False)
| Return Series with number of non-NA/null observations over requested
| axis. Works with non-floating point data as well (detects NaN and None)
|
| Parameters
| ----------
| axis : {0, 1}
| 0 for row-wise, 1 for column-wise
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a DataFrame
| numeric_only : boolean, default False
| Include only float, int, boolean data
|
| Returns
| -------
| count : Series (or DataFrame if level specified)
|
| cov(self, min_periods=None)
| Compute pairwise covariance of columns, excluding NA/null values
|
| Parameters
| ----------
| min_periods : int, optional
| Minimum number of observations required per pair of columns
| to have a valid result.
|
| Returns
| -------
| y : DataFrame
|
| Notes
| -----
| `y` contains the covariance matrix of the DataFrame's time series.
| The covariance is normalized by N-1 (unbiased estimator).
|
| cummax = max(self, axis=None, dtype=None, out=None, skipna=True, **kwargs)
| Return cumulative max over requested axis.
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| max : Series
|
| cummin = min(self, axis=None, dtype=None, out=None, skipna=True, **kwargs)
| Return cumulative min over requested axis.
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| min : Series
|
| cumprod = prod(self, axis=None, dtype=None, out=None, skipna=True, **kwargs)
| Return cumulative prod over requested axis.
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| prod : Series
|
| cumsum = sum(self, axis=None, dtype=None, out=None, skipna=True, **kwargs)
| Return cumulative sum over requested axis.
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| sum : Series
|
| diff(self, periods=1)
| 1st discrete difference of object
|
| Parameters
| ----------
| periods : int, default 1
| Periods to shift for forming difference
|
| Returns
| -------
| diffed : DataFrame
|
| div = truediv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator truediv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| divide = truediv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator truediv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| dot(self, other)
| Matrix multiplication with DataFrame or Series objects
|
| Parameters
| ----------
| other : DataFrame or Series
|
| Returns
| -------
| dot_product : DataFrame or Series
|
| drop_duplicates(*args, **kwargs)
| Return DataFrame with duplicate rows removed, optionally only
| considering certain columns
|
| Parameters
| ----------
| subset : column label or sequence of labels, optional
| Only consider certain columns for identifying duplicates, by
| default use all of the columns
| take_last : boolean, default False
| Take the last observed row in a row. Defaults to the first row
| inplace : boolean, default False
| Whether to drop duplicates in place or to return a copy
| cols : kwargs only argument of subset [deprecated]
|
| Returns
| -------
| deduplicated : DataFrame
|
| dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False)
| Return object with labels on given axis omitted where alternately any
| or all of the data are missing
|
| Parameters
| ----------
| axis : {0, 1}, or tuple/list thereof
| Pass tuple or list to drop on multiple axes
| how : {'any', 'all'}
| * any : if any NA values are present, drop that label
| * all : if all values are NA, drop that label
| thresh : int, default None
| int value : require that many non-NA values
| subset : array-like
| Labels along other axis to consider, e.g. if you are dropping rows
| these would be a list of columns to include
| inplace : boolean, defalt False
| If True, do operation inplace and return None.
|
| Returns
| -------
| dropped : DataFrame
|
| duplicated(*args, **kwargs)
| Return boolean Series denoting duplicate rows, optionally only
| considering certain columns
|
| Parameters
| ----------
| subset : column label or sequence of labels, optional
| Only consider certain columns for identifying duplicates, by
| default use all of the columns
| take_last : boolean, default False
| For a set of distinct duplicate rows, flag all but the last row as
| duplicated. Default is for all but the first row to be flagged
| cols : kwargs only argument of subset [deprecated]
|
| Returns
| -------
| duplicated : Series
|
| eq(self, other, axis='columns', level=None)
| Wrapper for flexible comparison methods eq
|
| eval(self, expr, **kwargs)
| Evaluate an expression in the context of the calling DataFrame
| instance.
|
| Parameters
| ----------
| expr : string
| The expression string to evaluate.
| kwargs : dict
| See the documentation for :func:`~pandas.eval` for complete details
| on the keyword arguments accepted by
| :meth:`~pandas.DataFrame.query`.
|
| Returns
| -------
| ret : ndarray, scalar, or pandas object
|
| See Also
| --------
| pandas.DataFrame.query
| pandas.eval
|
| Notes
| -----
| For more details see the API documentation for :func:`~pandas.eval`.
| For detailed examples see :ref:`enhancing performance with eval
| <enhancingperf.eval>`.
|
| Examples
| --------
| >>> from numpy.random import randn
| >>> from pandas import DataFrame
| >>> df = DataFrame(randn(10, 2), columns=list('ab'))
| >>> df.eval('a + b')
| >>> df.eval('c = a + b')
|
| first_valid_index(self)
| Return label for first non-NA/null value
|
| floordiv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator floordiv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| ge(self, other, axis='columns', level=None)
| Wrapper for flexible comparison methods ge
|
| get_value(self, index, col, takeable=False)
| Quickly retrieve single value at passed column and index
|
| Parameters
| ----------
| index : row label
| col : column label
| takeable : interpret the index/col as indexers, default False
|
| Returns
| -------
| value : scalar value
|
| gt(self, other, axis='columns', level=None)
| Wrapper for flexible comparison methods gt
|
| hist = hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds)
| Draw histogram of the DataFrame's series using matplotlib / pylab.
|
| Parameters
| ----------
| data : DataFrame
| column : string or sequence
| If passed, will be used to limit data to a subset of columns
| by : object, optional
| If passed, then used to form histograms for separate groups
| grid : boolean, default True
| Whether to show axis grid lines
| xlabelsize : int, default None
| If specified changes the x-axis label size
| xrot : float, default None
| rotation of x axis labels
| ylabelsize : int, default None
| If specified changes the y-axis label size
| yrot : float, default None
| rotation of y axis labels
| ax : matplotlib axes object, default None
| sharex : bool, if True, the X axis will be shared amongst all subplots.
| sharey : bool, if True, the Y axis will be shared amongst all subplots.
| figsize : tuple
| The size of the figure to create in inches by default
| layout: (optional) a tuple (rows, columns) for the layout of the histograms
| bins: integer, default 10
| Number of histogram bins to be used
| kwds : other plotting keyword arguments
| To be passed to hist function
|
| icol(self, i)
|
| idxmax(self, axis=0, skipna=True)
| Return index of first occurrence of maximum over requested axis.
| NA/null values are excluded.
|
| Parameters
| ----------
| axis : {0, 1}
| 0 for row-wise, 1 for column-wise
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be first index.
|
| Returns
| -------
| idxmax : Series
|
| Notes
| -----
| This method is the DataFrame version of ``ndarray.argmax``.
|
| See Also
| --------
| Series.idxmax
|
| idxmin(self, axis=0, skipna=True)
| Return index of first occurrence of minimum over requested axis.
| NA/null values are excluded.
|
| Parameters
| ----------
| axis : {0, 1}
| 0 for row-wise, 1 for column-wise
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| idxmin : Series
|
| Notes
| -----
| This method is the DataFrame version of ``ndarray.argmin``.
|
| See Also
| --------
| Series.idxmin
|
| iget_value(self, i, j)
|
| info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None)
| Concise summary of a DataFrame.
|
| Parameters
| ----------
| verbose : {None, True, False}, optional
| Whether to print the full summary.
| None follows the `display.max_info_columns` setting.
| True or False overrides the `display.max_info_columns` setting.
| buf : writable buffer, defaults to sys.stdout
| max_cols : int, default None
| Determines whether full summary or short summary is printed.
| None follows the `display.max_info_columns` setting.
| memory_usage : boolean, default None
| Specifies whether total memory usage of the DataFrame
| elements (including index) should be displayed. None follows
| the `display.memory_usage` setting. True or False overrides
| the `display.memory_usage` setting. Memory usage is shown in
| human-readable units (base-2 representation).
| null_counts : boolean, default None
| Whether to show the non-null counts
| If None, then only show if the frame is smaller than max_info_rows and max_info_columns.
| If True, always show counts.
| If False, never show counts.
|
| insert(self, loc, column, value, allow_duplicates=False)
| Insert column into DataFrame at specified location.
|
| If `allow_duplicates` is False, raises Exception if column
| is already contained in the DataFrame.
|
| Parameters
| ----------
| loc : int
| Must have 0 <= loc <= len(columns)
| column : object
| value : int, Series, or array-like
|
| irow(self, i, copy=False)
|
| isin(self, values)
| Return boolean DataFrame showing whether each element in the
| DataFrame is contained in values.
|
| Parameters
| ----------
| values : iterable, Series, DataFrame or dictionary
| The result will only be true at a location if all the
| labels match. If `values` is a Series, that's the index. If
| `values` is a dictionary, the keys must be the column names,
| which must match. If `values` is a DataFrame,
| then both the index and column labels must match.
|
| Returns
| -------
|
| DataFrame of booleans
|
| Examples
| --------
| When ``values`` is a list:
|
| >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
| >>> df.isin([1, 3, 12, 'a'])
| A B
| 0 True True
| 1 False False
| 2 True False
|
| When ``values`` is a dict:
|
| >>> df = DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]})
| >>> df.isin({'A': [1, 3], 'B': [4, 7, 12]})
| A B
| 0 True False # Note that B didn't match the 1 here.
| 1 False True
| 2 True True
|
| When ``values`` is a Series or DataFrame:
|
| >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
| >>> other = DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']})
| >>> df.isin(other)
| A B
| 0 True False
| 1 False False # Column A in `other` has a 3, but not at index 1.
| 2 True True
|
| iteritems(self)
| Iterator over (column, series) pairs
|
| iterrows(self)
| Iterate over rows of DataFrame as (index, Series) pairs.
|
| Notes
| -----
|
| * ``iterrows`` does **not** preserve dtypes across the rows (dtypes
| are preserved across columns for DataFrames). For example,
|
| >>> df = DataFrame([[1, 1.0]], columns=['x', 'y'])
| >>> row = next(df.iterrows())[1]
| >>> print(row['x'].dtype)
| float64
| >>> print(df['x'].dtype)
| int64
|
| Returns
| -------
| it : generator
| A generator that iterates over the rows of the frame.
|
| itertuples(self, index=True)
| Iterate over rows of DataFrame as tuples, with index value
| as first element of the tuple
|
| join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False)
| Join columns with other DataFrame either on index or on a key
| column. Efficiently Join multiple DataFrame objects by index at once by
| passing a list.
|
| Parameters
| ----------
| other : DataFrame, Series with name field set, or list of DataFrame
| Index should be similar to one of the columns in this one. If a
| Series is passed, its name attribute must be set, and that will be
| used as the column name in the resulting joined DataFrame
| on : column name, tuple/list of column names, or array-like
| Column(s) to use for joining, otherwise join on index. If multiples
| columns given, the passed DataFrame must have a MultiIndex. Can
| pass an array as the join key if not already contained in the
| calling DataFrame. Like an Excel VLOOKUP operation
| how : {'left', 'right', 'outer', 'inner'}
| How to handle indexes of the two objects. Default: 'left'
| for joining on index, None otherwise
|
| * left: use calling frame's index
| * right: use input frame's index
| * outer: form union of indexes
| * inner: use intersection of indexes
| lsuffix : string
| Suffix to use from left frame's overlapping columns
| rsuffix : string
| Suffix to use from right frame's overlapping columns
| sort : boolean, default False
| Order result DataFrame lexicographically by the join key. If False,
| preserves the index order of the calling (left) DataFrame
|
| Notes
| -----
| on, lsuffix, and rsuffix options are not supported when passing a list
| of DataFrame objects
|
| Returns
| -------
| joined : DataFrame
|
| kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return unbiased kurtosis over requested axis using Fishers definition of
| kurtosis (kurtosis of normal == 0.0). Normalized by N-1
|
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| kurt : Series or DataFrame (if level specified)
|
| kurtosis = kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
|
| last_valid_index(self)
| Return label for last non-NA/null value
|
| le(self, other, axis='columns', level=None)
| Wrapper for flexible comparison methods le
|
| lookup(self, row_labels, col_labels)
| Label-based "fancy indexing" function for DataFrame.
| Given equal-length arrays of row and column labels, return an
| array of the values corresponding to each (row, col) pair.
|
| Parameters
| ----------
| row_labels : sequence
| The row labels to use for lookup
| col_labels : sequence
| The column labels to use for lookup
|
| Notes
| -----
| Akin to::
|
| result = []
| for row, col in zip(row_labels, col_labels):
| result.append(df.get_value(row, col))
|
| Examples
| --------
| values : ndarray
| The found values
|
| lt(self, other, axis='columns', level=None)
| Wrapper for flexible comparison methods lt
|
| mad(self, axis=None, skipna=None, level=None)
| Return the mean absolute deviation of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| mad : Series or DataFrame (if level specified)
|
| max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| This method returns the maximum of the values in the object. If you
| want the *index* of the maximum, use ``idxmax``. This is the
| equivalent of the ``numpy.ndarray`` method ``argmax``.
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| max : Series or DataFrame (if level specified)
|
| mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the mean of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| mean : Series or DataFrame (if level specified)
|
| median(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the median of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| median : Series or DataFrame (if level specified)
|
| memory_usage(self, index=False)
| Memory usage of DataFrame columns.
|
| Parameters
| ----------
| index : bool
| Specifies whether to include memory usage of DataFrame's
| index in returned Series. If `index=True` (default is False)
| the first index of the Series is `Index`.
|
| Returns
| -------
| sizes : Series
| A series with column names as index and memory usage of
| columns with units of bytes.
|
| Notes
| -----
| Memory usage does not include memory consumed by elements that
| are not components of the array.
|
| See Also
| --------
| numpy.ndarray.nbytes
|
| merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True)
| Merge DataFrame objects by performing a database-style join operation by
| columns or indexes.
|
| If joining columns on columns, the DataFrame indexes *will be
| ignored*. Otherwise if joining indexes on indexes or indexes on a column or
| columns, the index will be passed on.
|
| Parameters
| ----------
| right : DataFrame
| how : {'left', 'right', 'outer', 'inner'}, default 'inner'
| * left: use only keys from left frame (SQL: left outer join)
| * right: use only keys from right frame (SQL: right outer join)
| * outer: use union of keys from both frames (SQL: full outer join)
| * inner: use intersection of keys from both frames (SQL: inner join)
| on : label or list
| Field names to join on. Must be found in both DataFrames. If on is
| None and not merging on indexes, then it merges on the intersection of
| the columns by default.
| left_on : label or list, or array-like
| Field names to join on in left DataFrame. Can be a vector or list of
| vectors of the length of the DataFrame to use a particular vector as
| the join key instead of columns
| right_on : label or list, or array-like
| Field names to join on in right DataFrame or vector/list of vectors per
| left_on docs
| left_index : boolean, default False
| Use the index from the left DataFrame as the join key(s). If it is a
| MultiIndex, the number of keys in the other DataFrame (either the index
| or a number of columns) must match the number of levels
| right_index : boolean, default False
| Use the index from the right DataFrame as the join key. Same caveats as
| left_index
| sort : boolean, default False
| Sort the join keys lexicographically in the result DataFrame
| suffixes : 2-length sequence (tuple, list, ...)
| Suffix to apply to overlapping column names in the left and right
| side, respectively
| copy : boolean, default True
| If False, do not copy data unnecessarily
|
| Examples
| --------
|
| >>> A >>> B
| lkey value rkey value
| 0 foo 1 0 foo 5
| 1 bar 2 1 bar 6
| 2 baz 3 2 qux 7
| 3 foo 4 3 bar 8
|
| >>> merge(A, B, left_on='lkey', right_on='rkey', how='outer')
| lkey value_x rkey value_y
| 0 foo 1 foo 5
| 1 foo 4 foo 5
| 2 bar 2 bar 6
| 3 bar 2 bar 8
| 4 baz 3 NaN NaN
| 5 NaN NaN qux 7
|
| Returns
| -------
| merged : DataFrame
| The output type will the be same as 'left', if it is a subclass
| of DataFrame.
|
| min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| This method returns the minimum of the values in the object. If you
| want the *index* of the minimum, use ``idxmin``. This is the
| equivalent of the ``numpy.ndarray`` method ``argmin``.
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| min : Series or DataFrame (if level specified)
|
| mod(self, other, axis='columns', level=None, fill_value=None)
| Binary operator mod with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| mode(self, axis=0, numeric_only=False)
| Gets the mode of each element along the axis selected. Empty if nothing
| has 2+ occurrences. Adds a row for each mode per label, fills in gaps
| with nan.
|
| Parameters
| ----------
| axis : {0, 1, 'index', 'columns'} (default 0)
| * 0/'index' : get mode of each column
| * 1/'columns' : get mode of each row
| numeric_only : boolean, default False
| if True, only apply to numeric columns
|
| Returns
| -------
| modes : DataFrame (sorted)
|
| mul(self, other, axis='columns', level=None, fill_value=None)
| Binary operator mul with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| multiply = mul(self, other, axis='columns', level=None, fill_value=None)
| Binary operator mul with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| ne(self, other, axis='columns', level=None)
| Wrapper for flexible comparison methods ne
|
| pivot(self, index=None, columns=None, values=None)
| Reshape data (produce a "pivot" table) based on column values. Uses
| unique values from index / columns to form axes and return either
| DataFrame or Panel, depending on whether you request a single value
| column (DataFrame) or all columns (Panel)
|
| Parameters
| ----------
| index : string or object
| Column name to use to make new frame's index
| columns : string or object
| Column name to use to make new frame's columns
| values : string or object, optional
| Column name to use for populating new frame's values
|
| Notes
| -----
| For finer-tuned control, see hierarchical indexing documentation along
| with the related stack/unstack methods
|
| Examples
| --------
| >>> df
| foo bar baz
| 0 one A 1.
| 1 one B 2.
| 2 one C 3.
| 3 two A 4.
| 4 two B 5.
| 5 two C 6.
|
| >>> df.pivot('foo', 'bar', 'baz')
| A B C
| one 1 2 3
| two 4 5 6
|
| >>> df.pivot('foo', 'bar')['baz']
| A B C
| one 1 2 3
| two 4 5 6
|
| Returns
| -------
| pivoted : DataFrame
| If no values column specified, will have hierarchically indexed
| columns
|
| pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True)
| Create a spreadsheet-style pivot table as a DataFrame. The levels in the
| pivot table will be stored in MultiIndex objects (hierarchical indexes) on
| the index and columns of the result DataFrame
|
| Parameters
| ----------
| data : DataFrame
| values : column to aggregate, optional
| index : a column, Grouper, array which has the same length as data, or list of them.
| Keys to group by on the pivot table index.
| If an array is passed, it is being used as the same manner as column values.
| columns : a column, Grouper, array which has the same length as data, or list of them.
| Keys to group by on the pivot table column.
| If an array is passed, it is being used as the same manner as column values.
| aggfunc : function, default numpy.mean, or list of functions
| If list of functions passed, the resulting pivot table will have
| hierarchical columns whose top level are the function names (inferred
| from the function objects themselves)
| fill_value : scalar, default None
| Value to replace missing values with
| margins : boolean, default False
| Add all row / columns (e.g. for subtotal / grand totals)
| dropna : boolean, default True
| Do not include columns whose entries are all NaN
|
| Examples
| --------
| >>> df
| A B C D
| 0 foo one small 1
| 1 foo one large 2
| 2 foo one large 2
| 3 foo two small 3
| 4 foo two small 3
| 5 bar one large 4
| 6 bar one small 5
| 7 bar two small 6
| 8 bar two large 7
|
| >>> table = pivot_table(df, values='D', index=['A', 'B'],
| ... columns=['C'], aggfunc=np.sum)
| >>> table
| small large
| foo one 1 4
| two 6 NaN
| bar one 5 4
| two 6 7
|
| Returns
| -------
| table : DataFrame
|
| plot = plot_frame(data, x=None, y=None, kind='line', ax=None, subplots=False, sharex=True, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False, **kwds)
| Make plots of DataFrame using matplotlib / pylab.
|
| Parameters
| ----------
| data : DataFrame
| x : label or position, default None
| y : label or position, default None
| Allows plotting of one column versus another
| kind : str
| - 'line' : line plot (default)
| - 'bar' : vertical bar plot
| - 'barh' : horizontal bar plot
| - 'hist' : histogram
| - 'box' : boxplot
| - 'kde' : Kernel Density Estimation plot
| - 'density' : same as 'kde'
| - 'area' : area plot
| - 'pie' : pie plot
| - 'scatter' : scatter plot
| - 'hexbin' : hexbin plot
| ax : matplotlib axes object, default None
| subplots : boolean, default False
| Make separate subplots for each column
| sharex : boolean, default True
| In case subplots=True, share x axis
| sharey : boolean, default False
| In case subplots=True, share y axis
| layout : tuple (optional)
| (rows, columns) for the layout of subplots
| figsize : a tuple (width, height) in inches
| use_index : boolean, default True
| Use index as ticks for x axis
| title : string
| Title to use for the plot
| grid : boolean, default None (matlab style default)
| Axis grid lines
| legend : False/True/'reverse'
| Place legend on axis subplots
| style : list or dict
| matplotlib line style per column
| logx : boolean, default False
| Use log scaling on x axis
| logy : boolean, default False
| Use log scaling on y axis
| loglog : boolean, default False
| Use log scaling on both x and y axes
| xticks : sequence
| Values to use for the xticks
| yticks : sequence
| Values to use for the yticks
| xlim : 2-tuple/list
| ylim : 2-tuple/list
| rot : int, default None
| Rotation for ticks (xticks for vertical, yticks for horizontal plots)
| fontsize : int, default None
| Font size for xticks and yticks
| colormap : str or matplotlib colormap object, default None
| Colormap to select colors from. If string, load colormap with that name
| from matplotlib.
| colorbar : boolean, optional
| If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots)
| position : float
| Specify relative alignments for bar plot layout.
| From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
| layout : tuple (optional)
| (rows, columns) for the layout of the plot
| table : boolean, Series or DataFrame, default False
| If True, draw a table using the data in the DataFrame and the data will
| be transposed to meet matplotlib's default layout.
| If a Series or DataFrame is passed, use passed data to draw a table.
| yerr : DataFrame, Series, array-like, dict and str
| See :ref:`Plotting with Error Bars <visualization.errorbars>` for detail.
| xerr : same types as yerr.
| stacked : boolean, default False in line and
| bar plots, and True in area plot. If True, create stacked plot.
| sort_columns : boolean, default False
| Sort column names to determine plot ordering
| secondary_y : boolean or sequence, default False
| Whether to plot on the secondary y-axis
| If a list/tuple, which columns to plot on secondary y-axis
| mark_right : boolean, default True
| When using a secondary_y axis, automatically mark the column
| labels with "(right)" in the legend
| kwds : keywords
| Options to pass to matplotlib plotting method
|
| Returns
| -------
| axes : matplotlib.AxesSubplot or np.array of them
|
| Notes
| -----
|
| - See matplotlib documentation online for more on this subject
| - If `kind` = 'bar' or 'barh', you can specify relative alignments
| for bar plot layout by `position` keyword.
| From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
| - If `kind` = 'scatter' and the argument `c` is the name of a dataframe
| column, the values of that column are used to color each point.
| - If `kind` = 'hexbin', you can control the size of the bins with the
| `gridsize` argument. By default, a histogram of the counts around each
| `(x, y)` point is computed. You can specify alternative aggregations
| by passing values to the `C` and `reduce_C_function` arguments.
| `C` specifies the value at each `(x, y)` point and `reduce_C_function`
| is a function of one argument that reduces all the values in a bin to
| a single number (e.g. `mean`, `max`, `sum`, `std`).
|
| pow(self, other, axis='columns', level=None, fill_value=None)
| Binary operator pow with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| prod(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the product of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| prod : Series or DataFrame (if level specified)
|
| product = prod(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
|
| quantile(self, q=0.5, axis=0, numeric_only=True)
| Return values at the given quantile over requested axis, a la
| numpy.percentile.
|
| Parameters
| ----------
| q : float or array-like, default 0.5 (50% quantile)
| 0 <= q <= 1, the quantile(s) to compute
| axis : {0, 1}
| 0 for row-wise, 1 for column-wise
|
| Returns
| -------
| quantiles : Series or DataFrame
| If ``q`` is an array, a DataFrame will be returned where the
| index is ``q``, the columns are the columns of self, and the
| values are the quantiles.
| If ``q`` is a float, a Series will be returned where the
| index is the columns of self and the values are the quantiles.
|
| Examples
| --------
|
| >>> df = DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),
| columns=['a', 'b'])
| >>> df.quantile(.1)
| a 1.3
| b 3.7
| dtype: float64
| >>> df.quantile([.1, .5])
| a b
| 0.1 1.3 3.7
| 0.5 2.5 55.0
|
| query(self, expr, **kwargs)
| Query the columns of a frame with a boolean expression.
|
| .. versionadded:: 0.13
|
| Parameters
| ----------
| expr : string
| The query string to evaluate. You can refer to variables
| in the environment by prefixing them with an '@' character like
| ``@a + b``.
| kwargs : dict
| See the documentation for :func:`pandas.eval` for complete details
| on the keyword arguments accepted by :meth:`DataFrame.query`.
|
| Returns
| -------
| q : DataFrame
|
| Notes
| -----
| The result of the evaluation of this expression is first passed to
| :attr:`DataFrame.loc` and if that fails because of a
| multidimensional key (e.g., a DataFrame) then the result will be passed
| to :meth:`DataFrame.__getitem__`.
|
| This method uses the top-level :func:`pandas.eval` function to
| evaluate the passed query.
|
| The :meth:`~pandas.DataFrame.query` method uses a slightly
| modified Python syntax by default. For example, the ``&`` and ``|``
| (bitwise) operators have the precedence of their boolean cousins,
| :keyword:`and` and :keyword:`or`. This *is* syntactically valid Python,
| however the semantics are different.
|
| You can change the semantics of the expression by passing the keyword
| argument ``parser='python'``. This enforces the same semantics as
| evaluation in Python space. Likewise, you can pass ``engine='python'``
| to evaluate an expression using Python itself as a backend. This is not
| recommended as it is inefficient compared to using ``numexpr`` as the
| engine.
|
| The :attr:`DataFrame.index` and
| :attr:`DataFrame.columns` attributes of the
| :class:`~pandas.DataFrame` instance are placed in the query namespace
| by default, which allows you to treat both the index and columns of the
| frame as a column in the frame.
| The identifier ``index`` is used for the frame index; you can also
| use the name of the index to identify it in a query.
|
| For further details and examples see the ``query`` documentation in
| :ref:`indexing <indexing.query>`.
|
| See Also
| --------
| pandas.eval
| DataFrame.eval
|
| Examples
| --------
| >>> from numpy.random import randn
| >>> from pandas import DataFrame
| >>> df = DataFrame(randn(10, 2), columns=list('ab'))
| >>> df.query('a > b')
| >>> df[df.a > df.b] # same result as the previous expression
|
| radd(self, other, axis='columns', level=None, fill_value=None)
| Binary operator radd with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| rank(self, axis=0, numeric_only=None, method='average', na_option='keep', ascending=True, pct=False)
| Compute numerical data ranks (1 through n) along axis. Equal values are
| assigned a rank that is the average of the ranks of those values
|
| Parameters
| ----------
| axis : {0, 1}, default 0
| Ranks over columns (0) or rows (1)
| numeric_only : boolean, default None
| Include only float, int, boolean data
| method : {'average', 'min', 'max', 'first', 'dense'}
| * average: average rank of group
| * min: lowest rank in group
| * max: highest rank in group
| * first: ranks assigned in order they appear in the array
| * dense: like 'min', but rank always increases by 1 between groups
| na_option : {'keep', 'top', 'bottom'}
| * keep: leave NA values where they are
| * top: smallest rank if ascending
| * bottom: smallest rank if descending
| ascending : boolean, default True
| False for ranks by high (1) to low (N)
| pct : boolean, default False
| Computes percentage rank of data
|
| Returns
| -------
| ranks : DataFrame
|
| rdiv = rtruediv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rtruediv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| reindex(self, index=None, columns=None, **kwargs)
| Conform DataFrame to new index with optional filling logic, placing
| NA/NaN in locations having no value in the previous index. A new object
| is produced unless the new index is equivalent to the current one and
| copy=False
|
| Parameters
| ----------
| index, columns : array-like, optional (can be specified in order, or as
| keywords)
| New labels / index to conform to. Preferably an Index object to
| avoid duplicating data
| method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
| Method to use for filling holes in reindexed DataFrame:
| * default: don't fill gaps
| * pad / ffill: propagate last valid observation forward to next valid
| * backfill / bfill: use next valid observation to fill gap
| * nearest: use nearest valid observations to fill gap
| copy : boolean, default True
| Return a new object, even if the passed indexes are the same
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
| fill_value : scalar, default np.NaN
| Value to use for missing values. Defaults to NaN, but can be any
| "compatible" value
| limit : int, default None
| Maximum size gap to forward or backward fill
|
| Examples
| --------
| >>> df.reindex(index=[date1, date2, date3], columns=['A', 'B', 'C'])
|
| Returns
| -------
| reindexed : DataFrame
|
| reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, limit=None, fill_value=nan)
| Conform input object to new index with optional filling logic,
| placing NA/NaN in locations having no value in the previous index. A
| new object is produced unless the new index is equivalent to the
| current one and copy=False
|
| Parameters
| ----------
| labels : array-like
| New labels / index to conform to. Preferably an Index object to
| avoid duplicating data
| axis : {0,1,'index','columns'}
| method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
| Method to use for filling holes in reindexed DataFrame:
| * default: don't fill gaps
| * pad / ffill: propagate last valid observation forward to next valid
| * backfill / bfill: use next valid observation to fill gap
| * nearest: use nearest valid observations to fill gap
| copy : boolean, default True
| Return a new object, even if the passed indexes are the same
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
| limit : int, default None
| Maximum size gap to forward or backward fill
|
| Examples
| --------
| >>> df.reindex_axis(['A', 'B', 'C'], axis=1)
|
| See also
| --------
| reindex, reindex_like
|
| Returns
| -------
| reindexed : DataFrame
|
| rename(self, index=None, columns=None, **kwargs)
| Alter axes input function or functions. Function / dict values must be
| unique (1-to-1). Labels not contained in a dict / Series will be left
| as-is.
|
| Parameters
| ----------
| index, columns : dict-like or function, optional
| Transformation to apply to that axis values
|
| copy : boolean, default True
| Also copy underlying data
| inplace : boolean, default False
| Whether to return a new DataFrame. If True then value of copy is
| ignored.
|
| Returns
| -------
| renamed : DataFrame (new object)
|
| reorder_levels(self, order, axis=0)
| Rearrange index levels using input order.
| May not drop or duplicate levels
|
| Parameters
| ----------
| order : list of int or list of str
| List representing new level order. Reference level by number
| (position) or by key (label).
| axis : int
| Where to reorder levels.
|
| Returns
| -------
| type of caller (new object)
|
| reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill='')
| For DataFrame with multi-level index, return new DataFrame with
| labeling information in the columns under the index names, defaulting
| to 'level_0', 'level_1', etc. if any are None. For a standard index,
| the index name will be used (if set), otherwise a default 'index' or
| 'level_0' (if 'index' is already taken) will be used.
|
| Parameters
| ----------
| level : int, str, tuple, or list, default None
| Only remove the given levels from the index. Removes all levels by
| default
| drop : boolean, default False
| Do not try to insert index into dataframe columns. This resets
| the index to the default integer index.
| inplace : boolean, default False
| Modify the DataFrame in place (do not create a new object)
| col_level : int or str, default 0
| If the columns have multiple levels, determines which level the
| labels are inserted into. By default it is inserted into the first
| level.
| col_fill : object, default ''
| If the columns have multiple levels, determines how the other
| levels are named. If None then the index name is repeated.
|
| Returns
| -------
| resetted : DataFrame
|
| rfloordiv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rfloordiv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| rmod(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rmod with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| rmul(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rmul with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| rpow(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rpow with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| rsub(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rsub with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| rtruediv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator rtruediv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| select_dtypes(self, include=None, exclude=None)
| Return a subset of a DataFrame including/excluding columns based on
| their ``dtype``.
|
| Parameters
| ----------
| include, exclude : list-like
| A list of dtypes or strings to be included/excluded. You must pass
| in a non-empty sequence for at least one of these.
|
| Raises
| ------
| ValueError
| * If both of ``include`` and ``exclude`` are empty
| * If ``include`` and ``exclude`` have overlapping elements
| * If any kind of string dtype is passed in.
| TypeError
| * If either of ``include`` or ``exclude`` is not a sequence
|
| Returns
| -------
| subset : DataFrame
| The subset of the frame including the dtypes in ``include`` and
| excluding the dtypes in ``exclude``.
|
| Notes
| -----
| * To select all *numeric* types use the numpy dtype ``numpy.number``
| * To select strings you must use the ``object`` dtype, but note that
| this will return *all* object dtype columns
| * See the `numpy dtype hierarchy
| <http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html>`__
| * To select Pandas categorical dtypes, use 'category'
|
| Examples
| --------
| >>> df = pd.DataFrame({'a': np.random.randn(6).astype('f4'),
| ... 'b': [True, False] * 3,
| ... 'c': [1.0, 2.0] * 3})
| >>> df
| a b c
| 0 0.3962 True 1
| 1 0.1459 False 2
| 2 0.2623 True 1
| 3 0.0764 False 2
| 4 -0.9703 True 1
| 5 -1.2094 False 2
| >>> df.select_dtypes(include=['float64'])
| c
| 0 1
| 1 2
| 2 1
| 3 2
| 4 1
| 5 2
| >>> df.select_dtypes(exclude=['floating'])
| b
| 0 True
| 1 False
| 2 True
| 3 False
| 4 True
| 5 False
|
| sem(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)
| Return unbiased standard error of the mean over requested axis.
|
| Normalized by N-1 by default. This can be changed using the ddof argument
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| sem : Series or DataFrame (if level specified)
|
| set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False)
| Set the DataFrame index (row labels) using one or more existing
| columns. By default yields a new object.
|
| Parameters
| ----------
| keys : column label or list of column labels / arrays
| drop : boolean, default True
| Delete columns to be used as the new index
| append : boolean, default False
| Whether to append columns to existing index
| inplace : boolean, default False
| Modify the DataFrame in place (do not create a new object)
| verify_integrity : boolean, default False
| Check the new index for duplicates. Otherwise defer the check until
| necessary. Setting to False will improve the performance of this
| method
|
| Examples
| --------
| >>> indexed_df = df.set_index(['A', 'B'])
| >>> indexed_df2 = df.set_index(['A', [0, 1, 2, 0, 1, 2]])
| >>> indexed_df3 = df.set_index([[0, 1, 2, 0, 1, 2]])
|
| Returns
| -------
| dataframe : DataFrame
|
| set_value(self, index, col, value, takeable=False)
| Put single value at passed column and index
|
| Parameters
| ----------
| index : row label
| col : column label
| value : scalar value
| takeable : interpret the index/col as indexers, default False
|
| Returns
| -------
| frame : DataFrame
| If label pair is contained, will be reference to calling DataFrame,
| otherwise a new object
|
| skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return unbiased skew over requested axis
| Normalized by N-1
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| skew : Series or DataFrame (if level specified)
|
| sort(self, columns=None, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')
| Sort DataFrame either by labels (along either axis) or by the values in
| column(s)
|
| Parameters
| ----------
| columns : object
| Column name(s) in frame. Accepts a column name or a list
| for a nested sort. A tuple will be interpreted as the
| levels of a multi-index.
| ascending : boolean or list, default True
| Sort ascending vs. descending. Specify list for multiple sort
| orders
| axis : {0, 1}
| Sort index/rows versus columns
| inplace : boolean, default False
| Sort the DataFrame without creating a new instance
| kind : {'quicksort', 'mergesort', 'heapsort'}, optional
| This option is only applied when sorting on a single column or label.
| na_position : {'first', 'last'} (optional, default='last')
| 'first' puts NaNs at the beginning
| 'last' puts NaNs at the end
|
| Examples
| --------
| >>> result = df.sort(['A', 'B'], ascending=[1, 0])
|
| Returns
| -------
| sorted : DataFrame
|
| sort_index(self, axis=0, by=None, ascending=True, inplace=False, kind='quicksort', na_position='last')
| Sort DataFrame either by labels (along either axis) or by the values in
| a column
|
| Parameters
| ----------
| axis : {0, 1}
| Sort index/rows versus columns
| by : object
| Column name(s) in frame. Accepts a column name or a list
| for a nested sort. A tuple will be interpreted as the
| levels of a multi-index.
| ascending : boolean or list, default True
| Sort ascending vs. descending. Specify list for multiple sort
| orders
| inplace : boolean, default False
| Sort the DataFrame without creating a new instance
| na_position : {'first', 'last'} (optional, default='last')
| 'first' puts NaNs at the beginning
| 'last' puts NaNs at the end
| kind : {'quicksort', 'mergesort', 'heapsort'}, optional
| This option is only applied when sorting on a single column or label.
|
| Examples
| --------
| >>> result = df.sort_index(by=['A', 'B'], ascending=[True, False])
|
| Returns
| -------
| sorted : DataFrame
|
| sortlevel(self, level=0, axis=0, ascending=True, inplace=False, sort_remaining=True)
| Sort multilevel index by chosen axis and primary level. Data will be
| lexicographically sorted by the chosen level followed by the other
| levels (in order)
|
| Parameters
| ----------
| level : int
| axis : {0, 1}
| ascending : boolean, default True
| inplace : boolean, default False
| Sort the DataFrame without creating a new instance
| sort_remaining : boolean, default True
| Sort by the other levels too.
|
| Returns
| -------
| sorted : DataFrame
|
| stack(self, level=-1, dropna=True)
| Pivot a level of the (possibly hierarchical) column labels, returning a
| DataFrame (or Series in the case of an object with a single level of
| column labels) having a hierarchical index with a new inner-most level
| of row labels.
| The level involved will automatically get sorted.
|
| Parameters
| ----------
| level : int, string, or list of these, default last level
| Level(s) to stack, can pass level name
| dropna : boolean, default True
| Whether to drop rows in the resulting Frame/Series with no valid
| values
|
| Examples
| ----------
| >>> s
| a b
| one 1. 2.
| two 3. 4.
|
| >>> s.stack()
| one a 1
| b 2
| two a 3
| b 4
|
| Returns
| -------
| stacked : DataFrame or Series
|
| std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)
| Return unbiased standard deviation over requested axis.
|
| Normalized by N-1 by default. This can be changed using the ddof argument
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| std : Series or DataFrame (if level specified)
|
| sub(self, other, axis='columns', level=None, fill_value=None)
| Binary operator sub with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| subtract = sub(self, other, axis='columns', level=None, fill_value=None)
| Binary operator sub with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| sum(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the sum of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| sum : Series or DataFrame (if level specified)
|
| swaplevel(self, i, j, axis=0)
| Swap levels i and j in a MultiIndex on a particular axis
|
| Parameters
| ----------
| i, j : int, string (can be mixed)
| Level of index to be swapped. Can pass level name as string.
|
| Returns
| -------
| swapped : type of caller (new object)
|
| to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=False, date_format=None, doublequote=True, escapechar=None, decimal='.', **kwds)
| Write DataFrame to a comma-separated values (csv) file
|
| Parameters
| ----------
| path_or_buf : string or file handle, default None
| File path or object, if None is provided the result is returned as
| a string.
| sep : character, default ","
| Field delimiter for the output file.
| na_rep : string, default ''
| Missing data representation
| float_format : string, default None
| Format string for floating point numbers
| columns : sequence, optional
| Columns to write
| header : boolean or list of string, default True
| Write out column names. If a list of string is given it is assumed
| to be aliases for the column names
| index : boolean, default True
| Write row names (index)
| index_label : string or sequence, or False, default None
| Column label for index column(s) if desired. If None is given, and
| `header` and `index` are True, then the index names are used. A
| sequence should be given if the DataFrame uses MultiIndex. If
| False do not print fields for index names. Use index_label=False
| for easier importing in R
| nanRep : None
| deprecated, use na_rep
| mode : str
| Python write mode, default 'w'
| encoding : string, optional
| A string representing the encoding to use in the output file,
| defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
| line_terminator : string, default '\\n'
| The newline character or character sequence to use in the output
| file
| quoting : optional constant from csv module
| defaults to csv.QUOTE_MINIMAL
| quotechar : string (length 1), default '"'
| character used to quote fields
| doublequote : boolean, default True
| Control quoting of `quotechar` inside a field
| escapechar : string (length 1), default None
| character used to escape `sep` and `quotechar` when appropriate
| chunksize : int or None
| rows to write at a time
| tupleize_cols : boolean, default False
| write multi_index columns as a list of tuples (if True)
| or new (expanded format) if False)
| date_format : string, default None
| Format string for datetime objects
| decimal: string, default '.'
| Character recognized as decimal separator. E.g. use ',' for European data
|
| to_dict(*args, **kwargs)
| Convert DataFrame to dictionary.
|
| Parameters
| ----------
| orient : str {'dict', 'list', 'series', 'split', 'records'}
| Determines the type of the values of the dictionary.
|
| - dict (default) : dict like {column -> {index -> value}}
| - list : dict like {column -> [values]}
| - series : dict like {column -> Series(values)}
| - split : dict like
| {index -> [index], columns -> [columns], data -> [values]}
| - records : list like
| [{column -> value}, ... , {column -> value}]
|
| Abbreviations are allowed. `s` indicates `series` and `sp`
| indicates `split`.
|
| Returns
| -------
| result : dict like {column -> {index -> value}}
|
| to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf')
| Write DataFrame to a excel sheet
|
| Parameters
| ----------
| excel_writer : string or ExcelWriter object
| File path or existing ExcelWriter
| sheet_name : string, default 'Sheet1'
| Name of sheet which will contain DataFrame
| na_rep : string, default ''
| Missing data representation
| float_format : string, default None
| Format string for floating point numbers
| columns : sequence, optional
| Columns to write
| header : boolean or list of string, default True
| Write out column names. If a list of string is given it is
| assumed to be aliases for the column names
| index : boolean, default True
| Write row names (index)
| index_label : string or sequence, default None
| Column label for index column(s) if desired. If None is given, and
| `header` and `index` are True, then the index names are used. A
| sequence should be given if the DataFrame uses MultiIndex.
| startrow :
| upper left cell row to dump data frame
| startcol :
| upper left cell column to dump data frame
| engine : string, default None
| write engine to use - you can also set this via the options
| ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
| ``io.excel.xlsm.writer``.
| merge_cells : boolean, default True
| Write MultiIndex and Hierarchical Rows as merged cells.
| encoding: string, default None
| encoding of the resulting excel file. Only necessary for xlwt,
| other writers support unicode natively.
| inf_rep : string, default 'inf'
| Representation for infinity (there is no native representation for
| infinity in Excel)
|
| Notes
| -----
| If passing an existing ExcelWriter object, then the sheet will be added
| to the existing workbook. This can be used to save different
| DataFrames to one workbook:
|
| >>> writer = ExcelWriter('output.xlsx')
| >>> df1.to_excel(writer,'Sheet1')
| >>> df2.to_excel(writer,'Sheet2')
| >>> writer.save()
|
| to_gbq(self, destination_table, project_id=None, chunksize=10000, verbose=True, reauth=False)
| Write a DataFrame to a Google BigQuery table.
|
| THIS IS AN EXPERIMENTAL LIBRARY
|
| If the table exists, the dataframe will be written to the table using
| the defined table schema and column types. For simplicity, this method
| uses the Google BigQuery streaming API. The to_gbq method chunks data
| into a default chunk size of 10,000. Failures return the complete error
| response which can be quite long depending on the size of the insert.
| There are several important limitations of the Google streaming API
| which are detailed at:
| https://developers.google.com/bigquery/streaming-data-into-bigquery.
|
| Parameters
| ----------
| dataframe : DataFrame
| DataFrame to be written
| destination_table : string
| Name of table to be written, in the form 'dataset.tablename'
| project_id : str
| Google BigQuery Account project ID.
| chunksize : int (default 10000)
| Number of rows to be inserted in each chunk from the dataframe.
| verbose : boolean (default True)
| Show percentage complete
| reauth : boolean (default False)
| Force Google BigQuery to reauthenticate the user. This is useful
| if multiple accounts are used.
|
| to_html(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False)
| Render a DataFrame as an HTML table.
|
| `to_html`-specific options:
|
| bold_rows : boolean, default True
| Make the row labels bold in the output
| classes : str or list or tuple, default None
| CSS class(es) to apply to the resulting html table
| escape : boolean, default True
| Convert the characters <, >, and & to HTML-safe sequences.=
| max_rows : int, optional
| Maximum number of rows to show before truncating. If None, show
| all.
| max_cols : int, optional
| Maximum number of columns to show before truncating. If None, show
| all.
|
|
| Parameters
| ----------
| frame : DataFrame
| object to render
| buf : StringIO-like, optional
| buffer to write to
| columns : sequence, optional
| the subset of columns to write; default None writes all columns
| col_space : int, optional
| the minimum width of each column
| header : bool, optional
| whether to print column labels, default True
| index : bool, optional
| whether to print index (row) labels, default True
| na_rep : string, optional
| string representation of NAN to use, default 'NaN'
| formatters : list or dict of one-parameter functions, optional
| formatter functions to apply to columns' elements by position or name,
| default None. The result of each function must be a unicode string.
| List must be of length equal to the number of columns.
| float_format : one-parameter function, optional
| formatter function to apply to columns' elements if they are floats,
| default None. The result of this function must be a unicode string.
| sparsify : bool, optional
| Set to False for a DataFrame with a hierarchical index to print every
| multiindex key at each row, default True
| justify : {'left', 'right'}, default None
| Left or right-justify the column labels. If None uses the option from
| the print configuration (controlled by set_option), 'right' out
| of the box.
| index_names : bool, optional
| Prints the names of the indexes, default True
| force_unicode : bool, default False
| Always return a unicode result. Deprecated in v0.10.0 as string
| formatting is now rendered to unicode by default.
|
| Returns
| -------
| formatted : string (or unicode, depending on data and options)
|
| to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, bold_rows=True, longtable=False, escape=True)
| Render a DataFrame to a tabular environment table. You can splice
| this into a LaTeX document. Requires \usepackage{booktabs}.
|
| `to_latex`-specific options:
|
| bold_rows : boolean, default True
| Make the row labels bold in the output
| longtable : boolean, default False
| Use a longtable environment instead of tabular. Requires adding
| a \usepackage{longtable} to your LaTeX preamble.
| escape : boolean, default True
| When set to False prevents from escaping latex special
| characters in column names.
|
|
| Parameters
| ----------
| frame : DataFrame
| object to render
| buf : StringIO-like, optional
| buffer to write to
| columns : sequence, optional
| the subset of columns to write; default None writes all columns
| col_space : int, optional
| the minimum width of each column
| header : bool, optional
| whether to print column labels, default True
| index : bool, optional
| whether to print index (row) labels, default True
| na_rep : string, optional
| string representation of NAN to use, default 'NaN'
| formatters : list or dict of one-parameter functions, optional
| formatter functions to apply to columns' elements by position or name,
| default None. The result of each function must be a unicode string.
| List must be of length equal to the number of columns.
| float_format : one-parameter function, optional
| formatter function to apply to columns' elements if they are floats,
| default None. The result of this function must be a unicode string.
| sparsify : bool, optional
| Set to False for a DataFrame with a hierarchical index to print every
| multiindex key at each row, default True
| justify : {'left', 'right'}, default None
| Left or right-justify the column labels. If None uses the option from
| the print configuration (controlled by set_option), 'right' out
| of the box.
| index_names : bool, optional
| Prints the names of the indexes, default True
| force_unicode : bool, default False
| Always return a unicode result. Deprecated in v0.10.0 as string
| formatting is now rendered to unicode by default.
|
| Returns
| -------
| formatted : string (or unicode, depending on data and options)
|
| to_panel(self)
| Transform long (stacked) format (DataFrame) into wide (3D, Panel)
| format.
|
| Currently the index of the DataFrame must be a 2-level MultiIndex. This
| may be generalized later
|
| Returns
| -------
| panel : Panel
|
| to_period(self, freq=None, axis=0, copy=True)
| Convert DataFrame from DatetimeIndex to PeriodIndex with desired
| frequency (inferred from index if not passed)
|
| Parameters
| ----------
| freq : string, default
| axis : {0, 1}, default 0
| The axis to convert (the index by default)
| copy : boolean, default True
| If False then underlying input data is not copied
|
| Returns
| -------
| ts : TimeSeries with PeriodIndex
|
| to_records(self, index=True, convert_datetime64=True)
| Convert DataFrame to record array. Index will be put in the
| 'index' field of the record array if requested
|
| Parameters
| ----------
| index : boolean, default True
| Include index in resulting record array, stored in 'index' field
| convert_datetime64 : boolean, default True
| Whether to convert the index to datetime.datetime if it is a
| DatetimeIndex
|
| Returns
| -------
| y : recarray
|
| to_sparse(self, fill_value=None, kind='block')
| Convert to SparseDataFrame
|
| Parameters
| ----------
| fill_value : float, default NaN
| kind : {'block', 'integer'}
|
| Returns
| -------
| y : SparseDataFrame
|
| to_stata(self, fname, convert_dates=None, write_index=True, encoding='latin-1', byteorder=None, time_stamp=None, data_label=None)
| A class for writing Stata binary dta files from array-like objects
|
| Parameters
| ----------
| fname : file path or buffer
| Where to save the dta file.
| convert_dates : dict
| Dictionary mapping column of datetime types to the stata internal
| format that you want to use for the dates. Options are
| 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either a
| number or a name.
| encoding : str
| Default is latin-1. Note that Stata does not support unicode.
| byteorder : str
| Can be ">", "<", "little", or "big". The default is None which uses
| `sys.byteorder`
|
| Examples
| --------
| >>> writer = StataWriter('./data_file.dta', data)
| >>> writer.write_file()
|
| Or with dates
|
| >>> writer = StataWriter('./date_data_file.dta', data, {2 : 'tw'})
| >>> writer.write_file()
|
| to_string(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, line_width=None, max_rows=None, max_cols=None, show_dimensions=False)
| Render a DataFrame to a console-friendly tabular output.
|
| Parameters
| ----------
| frame : DataFrame
| object to render
| buf : StringIO-like, optional
| buffer to write to
| columns : sequence, optional
| the subset of columns to write; default None writes all columns
| col_space : int, optional
| the minimum width of each column
| header : bool, optional
| whether to print column labels, default True
| index : bool, optional
| whether to print index (row) labels, default True
| na_rep : string, optional
| string representation of NAN to use, default 'NaN'
| formatters : list or dict of one-parameter functions, optional
| formatter functions to apply to columns' elements by position or name,
| default None. The result of each function must be a unicode string.
| List must be of length equal to the number of columns.
| float_format : one-parameter function, optional
| formatter function to apply to columns' elements if they are floats,
| default None. The result of this function must be a unicode string.
| sparsify : bool, optional
| Set to False for a DataFrame with a hierarchical index to print every
| multiindex key at each row, default True
| justify : {'left', 'right'}, default None
| Left or right-justify the column labels. If None uses the option from
| the print configuration (controlled by set_option), 'right' out
| of the box.
| index_names : bool, optional
| Prints the names of the indexes, default True
| force_unicode : bool, default False
| Always return a unicode result. Deprecated in v0.10.0 as string
| formatting is now rendered to unicode by default.
|
| Returns
| -------
| formatted : string (or unicode, depending on data and options)
|
| to_timestamp(self, freq=None, how='start', axis=0, copy=True)
| Cast to DatetimeIndex of timestamps, at *beginning* of period
|
| Parameters
| ----------
| freq : string, default frequency of PeriodIndex
| Desired frequency
| how : {'s', 'e', 'start', 'end'}
| Convention for converting period to timestamp; start of period
| vs. end
| axis : {0, 1} default 0
| The axis to convert (the index by default)
| copy : boolean, default True
| If false then underlying input data is not copied
|
| Returns
| -------
| df : DataFrame with DatetimeIndex
|
| to_wide = wrapper(*args, **kwargs)
|
| transpose(self)
| Transpose index and columns
|
| truediv(self, other, axis='columns', level=None, fill_value=None)
| Binary operator truediv with support to substitute a fill_value for missing data in
| one of the inputs
|
| Parameters
| ----------
| other : Series, DataFrame, or constant
| axis : {0, 1, 'index', 'columns'}
| For Series input, axis to match Series index on
| fill_value : None or float value, default None
| Fill missing (NaN) values with this value. If both DataFrame locations are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Notes
| -----
| Mismatched indices will be unioned together
|
| Returns
| -------
| result : DataFrame
|
| unstack(self, level=-1)
| Pivot a level of the (necessarily hierarchical) index labels, returning
| a DataFrame having a new level of column labels whose inner-most level
| consists of the pivoted index labels. If the index is not a MultiIndex,
| the output will be a Series (the analogue of stack when the columns are
| not a MultiIndex).
| The level involved will automatically get sorted.
|
| Parameters
| ----------
| level : int, string, or list of these, default -1 (last level)
| Level(s) of index to unstack, can pass level name
|
| See also
| --------
| DataFrame.pivot : Pivot a table based on column values.
| DataFrame.stack : Pivot a level of the column labels (inverse operation
| from `unstack`).
|
| Examples
| --------
| >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
| ... ('two', 'a'), ('two', 'b')])
| >>> s = pd.Series(np.arange(1.0, 5.0), index=index)
| >>> s
| one a 1
| b 2
| two a 3
| b 4
| dtype: float64
|
| >>> s.unstack(level=-1)
| a b
| one 1 2
| two 3 4
|
| >>> s.unstack(level=0)
| one two
| a 1 3
| b 2 4
|
| >>> df = s.unstack(level=0)
| >>> df.unstack()
| one a 1.
| b 3.
| two a 2.
| b 4.
|
| Returns
| -------
| unstacked : DataFrame or Series
|
| update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False)
| Modify DataFrame in place using non-NA values from passed
| DataFrame. Aligns on indices
|
| Parameters
| ----------
| other : DataFrame, or object coercible into a DataFrame
| join : {'left'}, default 'left'
| overwrite : boolean, default True
| If True then overwrite values for common keys in the calling frame
| filter_func : callable(1d-array) -> 1d-array<boolean>, default None
| Can choose to replace values other than NA. Return True for values
| that should be updated
| raise_conflict : boolean
| If True, will raise an error if the DataFrame and other both
| contain data in the same place.
|
| var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)
| Return unbiased variance over requested axis.
|
| Normalized by N-1 by default. This can be changed using the ddof argument
|
| Parameters
| ----------
| axis : {index (0), columns (1)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a Series
| numeric_only : boolean, default None
| Include only float, int, boolean data. If None, will attempt to use
| everything, then use only numeric data
|
| Returns
| -------
| var : Series or DataFrame (if level specified)
|
| ----------------------------------------------------------------------
| Class methods defined here:
|
| from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False) from __builtin__.type
| Read delimited file into DataFrame
|
| Parameters
| ----------
| path : string file path or file handle / StringIO
| header : int, default 0
| Row to use at header (skip prior rows)
| sep : string, default ','
| Field delimiter
| index_col : int or sequence, default 0
| Column to use for index. If a sequence is given, a MultiIndex
| is used. Different default from read_table
| parse_dates : boolean, default True
| Parse dates. Different default from read_table
| tupleize_cols : boolean, default False
| write multi_index columns as a list of tuples (if True)
| or new (expanded format) if False)
| infer_datetime_format: boolean, default False
| If True and `parse_dates` is True for a column, try to infer the
| datetime format based on the first datetime string. If the format
| can be inferred, there often will be a large parsing speed-up.
|
| Notes
| -----
| Preferable to use read_table for most general purposes but from_csv
| makes for an easy roundtrip to and from file, especially with a
| DataFrame of time series data
|
| Returns
| -------
| y : DataFrame
|
| from_dict(cls, data, orient='columns', dtype=None) from __builtin__.type
| Construct DataFrame from dict of array-like or dicts
|
| Parameters
| ----------
| data : dict
| {field : array-like} or {field : dict}
| orient : {'columns', 'index'}, default 'columns'
| The "orientation" of the data. If the keys of the passed dict
| should be the columns of the resulting DataFrame, pass 'columns'
| (default). Otherwise if the keys should be rows, pass 'index'.
|
| Returns
| -------
| DataFrame
|
| from_items(cls, items, columns=None, orient='columns') from __builtin__.type
| Convert (key, value) pairs to DataFrame. The keys will be the axis
| index (usually the columns, but depends on the specified
| orientation). The values should be arrays or Series.
|
| Parameters
| ----------
| items : sequence of (key, value) pairs
| Values should be arrays or Series.
| columns : sequence of column labels, optional
| Must be passed if orient='index'.
| orient : {'columns', 'index'}, default 'columns'
| The "orientation" of the data. If the keys of the
| input correspond to column labels, pass 'columns'
| (default). Otherwise if the keys correspond to the index,
| pass 'index'.
|
| Returns
| -------
| frame : DataFrame
|
| from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None) from __builtin__.type
| Convert structured or record ndarray to DataFrame
|
| Parameters
| ----------
| data : ndarray (structured dtype), list of tuples, dict, or DataFrame
| index : string, list of fields, array-like
| Field of array to use as the index, alternately a specific set of
| input labels to use
| exclude : sequence, default None
| Columns or fields to exclude
| columns : sequence, default None
| Column names to use. If the passed data do not have names
| associated with them, this argument provides names for the
| columns. Otherwise this argument indicates the order of the columns
| in the result (any names not found in the data will become all-NA
| columns)
| coerce_float : boolean, default False
| Attempt to convert values to non-string, non-numeric objects (like
| decimal.Decimal) to floating point, useful for SQL result sets
|
| Returns
| -------
| df : DataFrame
|
| ----------------------------------------------------------------------
| Data descriptors defined here:
|
| T
| Transpose index and columns
|
| axes
|
| columns
|
| index
|
| shape
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.generic.NDFrame:
|
| __abs__(self)
|
| __array__(self, dtype=None)
|
| __array_wrap__(self, result, context=None)
|
| __bool__ = __nonzero__(self)
|
| __contains__(self, key)
| True if the key is in the info axis
|
| __delitem__(self, key)
| Delete item
|
| __finalize__(self, other, method=None, **kwargs)
| propagate metadata from other to self
|
| Parameters
| ----------
| other : the object from which to get the attributes that we are going
| to propagate
| method : optional, a passed method name ; possibly to take different
| types of propagation actions based on this
|
| __getattr__(self, name)
| After regular attribute access, try looking up the name
| This allows simpler access to columns for interactive use.
|
| __getstate__(self)
|
| __hash__(self)
|
| __invert__(self)
|
| __iter__(self)
| Iterate over infor axis
|
| __neg__(self)
|
| __nonzero__(self)
|
| __setattr__(self, name, value)
| After regular attribute access, try setting the name
| This allows simpler access to columns for interactive use.
|
| __setstate__(self, state)
|
| abs(self)
| Return an object with absolute value taken. Only applicable to objects
| that are all numeric
|
| Returns
| -------
| abs: type of caller
|
| add_prefix(self, prefix)
| Concatenate prefix string with panel items names.
|
| Parameters
| ----------
| prefix : string
|
| Returns
| -------
| with_prefix : type of caller
|
| add_suffix(self, suffix)
| Concatenate suffix string with panel items names
|
| Parameters
| ----------
| suffix : string
|
| Returns
| -------
| with_suffix : type of caller
|
| align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0)
| Align two object on their axes with the
| specified join method for each axis Index
|
| Parameters
| ----------
| other : DataFrame or Series
| join : {'outer', 'inner', 'left', 'right'}, default 'outer'
| axis : allowed axis of the other object, default None
| Align on index (0), columns (1), or both (None)
| level : int or level name, default None
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
| copy : boolean, default True
| Always returns new objects. If copy=False and no reindexing is
| required then original objects are returned.
| fill_value : scalar, default np.NaN
| Value to use for missing values. Defaults to NaN, but can be any
| "compatible" value
| method : str, default None
| limit : int, default None
| fill_axis : {0, 1}, default 0
| Filling axis, method and limit
|
| Returns
| -------
| (left, right) : (type of input, type of other)
| Aligned objects
|
| as_blocks(self)
| Convert the frame to a dict of dtype -> Constructor Types that each has
| a homogeneous dtype.
|
| NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
| as_matrix)
|
| Returns
| -------
| values : a dict of dtype -> Constructor Types
|
| as_matrix(self, columns=None)
| Convert the frame to its Numpy-array representation.
|
| Parameters
| ----------
| columns: list, optional, default:None
| If None, return all columns, otherwise, returns specified columns.
|
| Returns
| -------
| values : ndarray
| If the caller is heterogeneous and contains booleans or objects,
| the result will be of dtype=object. See Notes.
|
|
| Notes
| -----
| Return is NOT a Numpy-matrix, rather, a Numpy-array.
|
| The dtype will be a lower-common-denominator dtype (implicit
| upcasting); that is to say if the dtypes (even of numeric types)
| are mixed, the one that accommodates all will be chosen. Use this
| with care if you are not dealing with the blocks.
|
| e.g. If the dtypes are float16 and float32, dtype will be upcast to
| float32. If dtypes are int32 and uint8, dtype will be upcase to
| int32.
|
| This method is provided for backwards compatibility. Generally,
| it is recommended to use '.values'.
|
| See Also
| --------
| pandas.DataFrame.values
|
| asfreq(self, freq, method=None, how=None, normalize=False)
| Convert all TimeSeries inside to specified frequency using DateOffset
| objects. Optionally provide fill method to pad/backfill missing values.
|
| Parameters
| ----------
| freq : DateOffset object, or string
| method : {'backfill', 'bfill', 'pad', 'ffill', None}
| Method to use for filling holes in reindexed Series
| pad / ffill: propagate last valid observation forward to next valid
| backfill / bfill: use NEXT valid observation to fill method
| how : {'start', 'end'}, default end
| For PeriodIndex only, see PeriodIndex.asfreq
| normalize : bool, default False
| Whether to reset output index to midnight
|
| Returns
| -------
| converted : type of caller
|
| astype(self, dtype, copy=True, raise_on_error=True, **kwargs)
| Cast object to input numpy.dtype
| Return a copy when copy = True (be really careful with this!)
|
| Parameters
| ----------
| dtype : numpy.dtype or Python type
| raise_on_error : raise on invalid input
| kwargs : keyword arguments to pass on to the constructor
|
| Returns
| -------
| casted : type of caller
|
| at_time(self, time, asof=False)
| Select values at particular time of day (e.g. 9:30AM)
|
| Parameters
| ----------
| time : datetime.time or string
|
| Returns
| -------
| values_at_time : type of caller
|
| between_time(self, start_time, end_time, include_start=True, include_end=True)
| Select values between particular times of the day (e.g., 9:00-9:30 AM)
|
| Parameters
| ----------
| start_time : datetime.time or string
| end_time : datetime.time or string
| include_start : boolean, default True
| include_end : boolean, default True
|
| Returns
| -------
| values_between_time : type of caller
|
| bfill(self, axis=None, inplace=False, limit=None, downcast=None)
| Synonym for NDFrame.fillna(method='bfill')
|
| bool(self)
| Return the bool of a single element PandasObject
| This must be a boolean scalar value, either True or False
|
| Raise a ValueError if the PandasObject does not have exactly
| 1 element, or that element is not boolean
|
| clip(self, lower=None, upper=None, out=None)
| Trim values at input threshold(s)
|
| Parameters
| ----------
| lower : float, default None
| upper : float, default None
|
| Returns
| -------
| clipped : Series
|
| clip_lower(self, threshold)
| Return copy of the input with values below given value truncated
|
| See also
| --------
| clip
|
| Returns
| -------
| clipped : same type as input
|
| clip_upper(self, threshold)
| Return copy of input with values above given value truncated
|
| See also
| --------
| clip
|
| Returns
| -------
| clipped : same type as input
|
| consolidate(self, inplace=False)
| Compute NDFrame with "consolidated" internals (data of each dtype
| grouped together in a single ndarray). Mainly an internal API function,
| but available here to the savvy user
|
| Parameters
| ----------
| inplace : boolean, default False
| If False return new object, otherwise modify existing object
|
| Returns
| -------
| consolidated : type of caller
|
| convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=True, copy=True)
| Attempt to infer better dtype for object columns
|
| Parameters
| ----------
| convert_dates : if True, attempt to soft convert dates, if 'coerce',
| force conversion (and non-convertibles get NaT)
| convert_numeric : if True attempt to coerce to numbers (including
| strings), non-convertibles get NaN
| convert_timedeltas : if True, attempt to soft convert timedeltas, if 'coerce',
| force conversion (and non-convertibles get NaT)
| copy : Boolean, if True, return copy even if no copy is necessary
| (e.g. no conversion was done), default is True.
| It is meant for internal use, not to be confused with `inplace` kw.
|
| Returns
| -------
| converted : asm as input object
|
| copy(self, deep=True)
| Make a copy of this object
|
| Parameters
| ----------
| deep : boolean or string, default True
| Make a deep copy, i.e. also copy data
|
| Returns
| -------
| copy : type of caller
|
| describe(self, percentile_width=None, percentiles=None, include=None, exclude=None)
| Generate various summary statistics, excluding NaN values.
|
| Parameters
| ----------
| percentile_width : float, deprecated
| The ``percentile_width`` argument will be removed in a future
| version. Use ``percentiles`` instead.
| width of the desired uncertainty interval, default is 50,
| which corresponds to lower=25, upper=75
| percentiles : array-like, optional
| The percentiles to include in the output. Should all
| be in the interval [0, 1]. By default `percentiles` is
| [.25, .5, .75], returning the 25th, 50th, and 75th percentiles.
| include, exclude : list-like, 'all', or None (default)
| Specify the form of the returned result. Either:
|
| - None to both (default). The result will include only numeric-typed
| columns or, if none are, only categorical columns.
| - A list of dtypes or strings to be included/excluded.
| To select all numeric types use numpy numpy.number. To select
| categorical objects use type object. See also the select_dtypes
| documentation. eg. df.describe(include=['O'])
| - If include is the string 'all', the output column-set will
| match the input one.
|
| Returns
| -------
| summary: NDFrame of summary statistics
|
| Notes
| -----
| The output DataFrame index depends on the requested dtypes:
|
| For numeric dtypes, it will include: count, mean, std, min,
| max, and lower, 50, and upper percentiles.
|
| For object dtypes (e.g. timestamps or strings), the index
| will include the count, unique, most common, and frequency of the
| most common. Timestamps also include the first and last items.
|
| For mixed dtypes, the index will be the union of the corresponding
| output types. Non-applicable entries will be filled with NaN.
| Note that mixed-dtype outputs can only be returned from mixed-dtype
| inputs and appropriate use of the include/exclude arguments.
|
| If multiple values have the highest count, then the
| `count` and `most common` pair will be arbitrarily chosen from
| among those with the highest count.
|
| The include, exclude arguments are ignored for Series.
|
| See also
| --------
| DataFrame.select_dtypes
|
| drop(self, labels, axis=0, level=None, inplace=False)
| Return new object with labels in requested axis removed
|
| Parameters
| ----------
| labels : single label or list-like
| axis : int or axis name
| level : int or level name, default None
| For MultiIndex
| inplace : bool, default False
| If True, do operation inplace and return None.
|
| Returns
| -------
| dropped : type of caller
|
| equals(self, other)
| Determines if two NDFrame objects contain the same elements. NaNs in the
| same location are considered equal.
|
| ffill(self, axis=None, inplace=False, limit=None, downcast=None)
| Synonym for NDFrame.fillna(method='ffill')
|
| fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None)
| Fill NA/NaN values using the specified method
|
| Parameters
| ----------
| method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
| Method to use for filling holes in reindexed Series
| pad / ffill: propagate last valid observation forward to next valid
| backfill / bfill: use NEXT valid observation to fill gap
| value : scalar, dict, Series, or DataFrame
| Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of
| values specifying which value to use for each index (for a Series) or
| column (for a DataFrame). (values not in the dict/Series/DataFrame will not be
| filled). This value cannot be a list.
| axis : {0, 1}, default 0
| * 0: fill column-by-column
| * 1: fill row-by-row
| inplace : boolean, default False
| If True, fill in place. Note: this will modify any
| other views on this object, (e.g. a no-copy slice for a column in a
| DataFrame).
| limit : int, default None
| Maximum size gap to forward or backward fill
| downcast : dict, default is None
| a dict of item->dtype of what to downcast if possible,
| or the string 'infer' which will try to downcast to an appropriate
| equal type (e.g. float64 to int64 if possible)
|
| See also
| --------
| reindex, asfreq
|
| Returns
| -------
| filled : same type as caller
|
| filter(self, items=None, like=None, regex=None, axis=None)
| Restrict the info axis to set of items or wildcard
|
| Parameters
| ----------
| items : list-like
| List of info axis to restrict to (must not all be present)
| like : string
| Keep info axis where "arg in col == True"
| regex : string (regular expression)
| Keep info axis with re.search(regex, col) == True
| axis : int or None
| The axis to filter on. By default this is the info axis. The "info
| axis" is the axis that is used when indexing with ``[]``. For
| example, ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']``. So,
| the ``DataFrame`` columns are the info axis.
|
| Notes
| -----
| Arguments are mutually exclusive, but this is not checked for
|
| first(self, offset)
| Convenience method for subsetting initial periods of time series data
| based on a date offset
|
| Parameters
| ----------
| offset : string, DateOffset, dateutil.relativedelta
|
| Examples
| --------
| ts.last('10D') -> First 10 days
|
| Returns
| -------
| subset : type of caller
|
| get(self, key, default=None)
| Get item from object for given key (DataFrame column, Panel slice,
| etc.). Returns default value if not found
|
| Parameters
| ----------
| key : object
|
| Returns
| -------
| value : type of items contained in object
|
| get_dtype_counts(self)
| Return the counts of dtypes in this object
|
| get_ftype_counts(self)
| Return the counts of ftypes in this object
|
| get_values(self)
| same as values (but handles sparseness conversions)
|
| groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False)
| Group series using mapper (dict or key function, apply given function
| to group, return result as series) or by a series of columns
|
| Parameters
| ----------
| by : mapping function / list of functions, dict, Series, or tuple /
| list of column names.
| Called on each element of the object index to determine the groups.
| If a dict or Series is passed, the Series or dict VALUES will be
| used to determine the groups
| axis : int, default 0
| level : int, level name, or sequence of such, default None
| If the axis is a MultiIndex (hierarchical), group by a particular
| level or levels
| as_index : boolean, default True
| For aggregated output, return object with group labels as the
| index. Only relevant for DataFrame input. as_index=False is
| effectively "SQL-style" grouped output
| sort : boolean, default True
| Sort group keys. Get better performance by turning this off
| group_keys : boolean, default True
| When calling apply, add group keys to index to identify pieces
| squeeze : boolean, default False
| reduce the dimensionaility of the return type if possible,
| otherwise return a consistent type
|
| Examples
| --------
| # DataFrame result
| >>> data.groupby(func, axis=0).mean()
|
| # DataFrame result
| >>> data.groupby(['col1', 'col2'])['col3'].mean()
|
| # DataFrame with hierarchical index
| >>> data.groupby(['col1', 'col2']).mean()
|
| Returns
| -------
| GroupBy object
|
| head(self, n=5)
| Returns first n rows
|
| interpolate(self, method='linear', axis=0, limit=None, inplace=False, downcast=None, **kwargs)
| Interpolate values according to different methods.
|
| Parameters
| ----------
| method : {'linear', 'time', 'index', 'values', 'nearest', 'zero',
| 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
| 'polynomial', 'spline' 'piecewise_polynomial', 'pchip'}
|
| * 'linear': ignore the index and treat the values as equally
| spaced. default
| * 'time': interpolation works on daily and higher resolution
| data to interpolate given length of interval
| * 'index', 'values': use the actual numerical values of the index
| * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
| 'barycentric', 'polynomial' is passed to
| `scipy.interpolate.interp1d` with the order given both
| 'polynomial' and 'spline' requre that you also specify and order
| (int) e.g. df.interpolate(method='polynomial', order=4)
| * 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all
| wrappers around the scipy interpolation methods of similar
| names. See the scipy documentation for more on their behavior:
| http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation
| http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
|
| axis : {0, 1}, default 0
| * 0: fill column-by-column
| * 1: fill row-by-row
| limit : int, default None.
| Maximum number of consecutive NaNs to fill.
| inplace : bool, default False
| Update the NDFrame in place if possible.
| downcast : optional, 'infer' or None, defaults to None
| Downcast dtypes if possible.
|
| Returns
| -------
| Series or DataFrame of same shape interpolated at the NaNs
|
| See Also
| --------
| reindex, replace, fillna
|
| Examples
| --------
|
| # Filling in NaNs:
| >>> s = pd.Series([0, 1, np.nan, 3])
| >>> s.interpolate()
| 0 0
| 1 1
| 2 2
| 3 3
| dtype: float64
|
| isnull(self)
| Return a boolean same-sized object indicating if the values are null
|
| See also
| --------
| notnull : boolean inverse of isnull
|
| iterkv(self, *args, **kwargs)
| iteritems alias used to get around 2to3. Deprecated
|
| keys(self)
| Get the 'info axis' (see Indexing for more)
|
| This is index for Series, columns for DataFrame and major_axis for
| Panel.
|
| last(self, offset)
| Convenience method for subsetting final periods of time series data
| based on a date offset
|
| Parameters
| ----------
| offset : string, DateOffset, dateutil.relativedelta
|
| Examples
| --------
| ts.last('5M') -> Last 5 months
|
| Returns
| -------
| subset : type of caller
|
| load(self, path)
| Deprecated. Use read_pickle instead.
|
| mask(self, cond)
| Returns copy whose values are replaced with nan if the
| inverted condition is True
|
| Parameters
| ----------
| cond : boolean NDFrame or array
|
| Returns
| -------
| wh: same as input
|
| notnull(self)
| Return a boolean same-sized object indicating if the values are
| not null
|
| See also
| --------
| isnull : boolean inverse of notnull
|
| pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwargs)
| Percent change over given number of periods.
|
| Parameters
| ----------
| periods : int, default 1
| Periods to shift for forming percent change
| fill_method : str, default 'pad'
| How to handle NAs before computing percent changes
| limit : int, default None
| The number of consecutive NAs to fill before stopping
| freq : DateOffset, timedelta, or offset alias string, optional
| Increment to use from time series API (e.g. 'M' or BDay())
|
| Returns
| -------
| chg : NDFrame
|
| Notes
| -----
|
| By default, the percentage change is calculated along the stat
| axis: 0, or ``Index``, for ``DataFrame`` and 1, or ``minor`` for
| ``Panel``. You can change this with the ``axis`` keyword argument.
|
| pop(self, item)
| Return item and drop from frame. Raise KeyError if not found.
|
| reindex_like(self, other, method=None, copy=True, limit=None)
| return an object with matching indicies to myself
|
| Parameters
| ----------
| other : Object
| method : string or None
| copy : boolean, default True
| limit : int, default None
| Maximum size gap to forward or backward fill
|
| Notes
| -----
| Like calling s.reindex(index=other.index, columns=other.columns,
| method=...)
|
| Returns
| -------
| reindexed : same as input
|
| rename_axis(self, mapper, axis=0, copy=True, inplace=False)
| Alter index and / or columns using input function or functions.
| Function / dict values must be unique (1-to-1). Labels not contained in
| a dict / Series will be left as-is.
|
| Parameters
| ----------
| mapper : dict-like or function, optional
| axis : int or string, default 0
| copy : boolean, default True
| Also copy underlying data
| inplace : boolean, default False
|
| Returns
| -------
| renamed : type of caller
|
| replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None)
| Replace values given in 'to_replace' with 'value'.
|
| Parameters
| ----------
| to_replace : str, regex, list, dict, Series, numeric, or None
|
| * str or regex:
|
| - str: string exactly matching `to_replace` will be replaced
| with `value`
| - regex: regexs matching `to_replace` will be replaced with
| `value`
|
| * list of str, regex, or numeric:
|
| - First, if `to_replace` and `value` are both lists, they
| **must** be the same length.
| - Second, if ``regex=True`` then all of the strings in **both**
| lists will be interpreted as regexs otherwise they will match
| directly. This doesn't matter much for `value` since there
| are only a few possible substitution regexes you can use.
| - str and regex rules apply as above.
|
| * dict:
|
| - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as
| follows: look in column 'a' for the value 'b' and replace it
| with nan. You can nest regular expressions as well. Note that
| column names (the top-level dictionary keys in a nested
| dictionary) **cannot** be regular expressions.
| - Keys map to column names and values map to substitution
| values. You can treat this as a special case of passing two
| lists except that you are specifying the column to search in.
|
| * None:
|
| - This means that the ``regex`` argument must be a string,
| compiled regular expression, or list, dict, ndarray or Series
| of such elements. If `value` is also ``None`` then this
| **must** be a nested dictionary or ``Series``.
|
| See the examples section for examples of each of these.
| value : scalar, dict, list, str, regex, default None
| Value to use to fill holes (e.g. 0), alternately a dict of values
| specifying which value to use for each column (columns not in the
| dict will not be filled). Regular expressions, strings and lists or
| dicts of such objects are also allowed.
| inplace : boolean, default False
| If True, in place. Note: this will modify any
| other views on this object (e.g. a column form a DataFrame).
| Returns the caller if this is True.
| limit : int, default None
| Maximum size gap to forward or backward fill
| regex : bool or same types as `to_replace`, default False
| Whether to interpret `to_replace` and/or `value` as regular
| expressions. If this is ``True`` then `to_replace` *must* be a
| string. Otherwise, `to_replace` must be ``None`` because this
| parameter will be interpreted as a regular expression or a list,
| dict, or array of regular expressions.
| method : string, optional, {'pad', 'ffill', 'bfill'}
| The method to use when for replacement, when ``to_replace`` is a
| ``list``.
|
| See also
| --------
| NDFrame.reindex
| NDFrame.asfreq
| NDFrame.fillna
|
| Returns
| -------
| filled : NDFrame
|
| Raises
| ------
| AssertionError
| * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
| TypeError
| * If `to_replace` is a ``dict`` and `value` is not a ``list``,
| ``dict``, ``ndarray``, or ``Series``
| * If `to_replace` is ``None`` and `regex` is not compilable into a
| regular expression or is a list, dict, ndarray, or Series.
| ValueError
| * If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but
| they are not the same length.
|
| Notes
| -----
| * Regex substitution is performed under the hood with ``re.sub``. The
| rules for substitution for ``re.sub`` are the same.
| * Regular expressions will only substitute on strings, meaning you
| cannot provide, for example, a regular expression matching floating
| point numbers and expect the columns in your frame that have a
| numeric dtype to be matched. However, if those floating point numbers
| *are* strings, then you can do this.
| * This method has *a lot* of options. You are encouraged to experiment
| and play with this method to gain intuition about how it works.
|
| resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', kind=None, loffset=None, limit=None, base=0)
| Convenience method for frequency conversion and resampling of regular
| time-series data.
|
| Parameters
| ----------
| rule : string
| the offset string or object representing target conversion
| how : string
| method for down- or re-sampling, default to 'mean' for
| downsampling
| axis : int, optional, default 0
| fill_method : string, default None
| fill_method for upsampling
| closed : {'right', 'left'}
| Which side of bin interval is closed
| label : {'right', 'left'}
| Which bin edge label to label bucket with
| convention : {'start', 'end', 's', 'e'}
| kind : "period"/"timestamp"
| loffset : timedelta
| Adjust the resampled time labels
| limit : int, default None
| Maximum size gap to when reindexing with fill_method
| base : int, default 0
| For frequencies that evenly subdivide 1 day, the "origin" of the
| aggregated intervals. For example, for '5min' frequency, base could
| range from 0 through 4. Defaults to 0
|
| save(self, path)
| Deprecated. Use to_pickle instead
|
| select(self, crit, axis=0)
| Return data corresponding to axis labels matching criteria
|
| Parameters
| ----------
| crit : function
| To be called on each index (label). Should return True or False
| axis : int
|
| Returns
| -------
| selection : type of caller
|
| set_axis(self, axis, labels)
| public verson of axis assignment
|
| shift(self, periods=1, freq=None, axis=0, **kwargs)
| Shift index by desired number of periods with an optional time freq
|
| Parameters
| ----------
| periods : int
| Number of periods to move, can be positive or negative
| freq : DateOffset, timedelta, or time rule string, optional
| Increment to use from datetools module or time rule (e.g. 'EOM').
| See Notes.
|
| Notes
| -----
| If freq is specified then the index values are shifted but the data
| is not realigned. That is, use freq if you would like to extend the
| index when shifting and preserve the original data.
|
| Returns
| -------
| shifted : same type as caller
|
| slice_shift(self, periods=1, axis=0)
| Equivalent to `shift` without copying data. The shifted data will
| not include the dropped periods and the shifted axis will be smaller
| than the original.
|
| Parameters
| ----------
| periods : int
| Number of periods to move, can be positive or negative
|
| Notes
| -----
| While the `slice_shift` is faster than `shift`, you may pay for it
| later during alignment.
|
| Returns
| -------
| shifted : same type as caller
|
| squeeze(self)
| squeeze length 1 dimensions
|
| swapaxes(self, axis1, axis2, copy=True)
| Interchange axes and swap values axes appropriately
|
| Returns
| -------
| y : same as input
|
| tail(self, n=5)
| Returns last n rows
|
| take(self, indices, axis=0, convert=True, is_copy=True)
| Analogous to ndarray.take
|
| Parameters
| ----------
| indices : list / array of ints
| axis : int, default 0
| convert : translate neg to pos indices (default)
| is_copy : mark the returned frame as a copy
|
| Returns
| -------
| taken : type of caller
|
| to_clipboard(self, excel=None, sep=None, **kwargs)
| Attempt to write text representation of object to the system clipboard
| This can be pasted into Excel, for example.
|
| Parameters
| ----------
| excel : boolean, defaults to True
| if True, use the provided separator, writing in a csv
| format for allowing easy pasting into excel.
| if False, write a string representation of the object
| to the clipboard
| sep : optional, defaults to tab
| other keywords are passed to to_csv
|
| Notes
| -----
| Requirements for your platform
| - Linux: xclip, or xsel (with gtk or PyQt4 modules)
| - Windows: none
| - OS X: none
|
| to_dense(self)
| Return dense representation of NDFrame (as opposed to sparse)
|
| to_hdf(self, path_or_buf, key, **kwargs)
| activate the HDFStore
|
| Parameters
| ----------
| path_or_buf : the path (string) or buffer to put the store
| key : string
| indentifier for the group in the store
| mode : optional, {'a', 'w', 'r', 'r+'}, default 'a'
|
| ``'r'``
| Read-only; no data can be modified.
| ``'w'``
| Write; a new file is created (an existing file with the same
| name would be deleted).
| ``'a'``
| Append; an existing file is opened for reading and writing,
| and if the file does not exist it is created.
| ``'r+'``
| It is similar to ``'a'``, but the file must already exist.
| format : 'fixed(f)|table(t)', default is 'fixed'
| fixed(f) : Fixed format
| Fast writing/reading. Not-appendable, nor searchable
| table(t) : Table format
| Write as a PyTables Table structure which may perform
| worse but allow more flexible operations like searching
| / selecting subsets of the data
| append : boolean, default False
| For Table formats, append the input data to the existing
| complevel : int, 1-9, default 0
| If a complib is specified compression will be applied
| where possible
| complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None
| If complevel is > 0 apply compression to objects written
| in the store wherever possible
| fletcher32 : bool, default False
| If applying compression use the fletcher32 checksum
|
| to_json(self, path_or_buf=None, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None)
| Convert the object to a JSON string.
|
| Note NaN's and None will be converted to null and datetime objects
| will be converted to UNIX timestamps.
|
| Parameters
| ----------
| path_or_buf : the path or buffer to write the result string
| if this is None, return a StringIO of the converted string
| orient : string
|
| * Series
|
| - default is 'index'
| - allowed values are: {'split','records','index'}
|
| * DataFrame
|
| - default is 'columns'
| - allowed values are:
| {'split','records','index','columns','values'}
|
| * The format of the JSON string
|
| - split : dict like
| {index -> [index], columns -> [columns], data -> [values]}
| - records : list like
| [{column -> value}, ... , {column -> value}]
| - index : dict like {index -> {column -> value}}
| - columns : dict like {column -> {index -> value}}
| - values : just the values array
|
| date_format : {'epoch', 'iso'}
| Type of date conversion. `epoch` = epoch milliseconds,
| `iso`` = ISO8601, default is epoch.
| double_precision : The number of decimal places to use when encoding
| floating point values, default 10.
| force_ascii : force encoded string to be ASCII, default True.
| date_unit : string, default 'ms' (milliseconds)
| The time unit to encode to, governs timestamp and ISO8601
| precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
| microsecond, and nanosecond respectively.
| default_handler : callable, default None
| Handler to call if object cannot otherwise be converted to a
| suitable format for JSON. Should receive a single argument which is
| the object to convert and return a serialisable object.
|
| Returns
| -------
| same type as input object with filtered info axis
|
| to_msgpack(self, path_or_buf=None, **kwargs)
| msgpack (serialize) object to input file path
|
| THIS IS AN EXPERIMENTAL LIBRARY and the storage format
| may not be stable until a future release.
|
| Parameters
| ----------
| path : string File path, buffer-like, or None
| if None, return generated string
| append : boolean whether to append to an existing msgpack
| (default is False)
| compress : type of compressor (zlib or blosc), default to None (no
| compression)
|
| to_pickle(self, path)
| Pickle (serialize) object to input file path
|
| Parameters
| ----------
| path : string
| File path
|
| to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None)
| Write records stored in a DataFrame to a SQL database.
|
| Parameters
| ----------
| name : string
| Name of SQL table
| con : SQLAlchemy engine or DBAPI2 connection (legacy mode)
| Using SQLAlchemy makes it possible to use any DB supported by that
| library.
| If a DBAPI2 object, only sqlite3 is supported.
| flavor : {'sqlite', 'mysql'}, default 'sqlite'
| The flavor of SQL to use. Ignored when using SQLAlchemy engine.
| 'mysql' is deprecated and will be removed in future versions, but it
| will be further supported through SQLAlchemy engines.
| schema : string, default None
| Specify the schema (if database flavor supports this). If None, use
| default schema.
| if_exists : {'fail', 'replace', 'append'}, default 'fail'
| - fail: If table exists, do nothing.
| - replace: If table exists, drop it, recreate it, and insert data.
| - append: If table exists, insert data. Create if does not exist.
| index : boolean, default True
| Write DataFrame index as a column.
| index_label : string or sequence, default None
| Column label for index column(s). If None is given (default) and
| `index` is True, then the index names are used.
| A sequence should be given if the DataFrame uses MultiIndex.
| chunksize : int, default None
| If not None, then rows will be written in batches of this size at a
| time. If None, all rows will be written at once.
| dtype : dict of column name to SQL type, default None
| Optional specifying the datatype for columns. The SQL type should
| be a SQLAlchemy type, or a string for sqlite3 fallback connection.
|
| truncate(self, before=None, after=None, axis=None, copy=True)
| Truncates a sorted NDFrame before and/or after some particular
| dates.
|
| Parameters
| ----------
| before : date
| Truncate before date
| after : date
| Truncate after date
| axis : the truncation axis, defaults to the stat axis
| copy : boolean, default is True,
| return a copy of the truncated section
|
| Returns
| -------
| truncated : type of caller
|
| tshift(self, periods=1, freq=None, axis=0, **kwargs)
| Shift the time index, using the index's frequency if available
|
| Parameters
| ----------
| periods : int
| Number of periods to move, can be positive or negative
| freq : DateOffset, timedelta, or time rule string, default None
| Increment to use from datetools module or time rule (e.g. 'EOM')
| axis : int or basestring
| Corresponds to the axis that contains the Index
|
| Notes
| -----
| If freq is not specified then tries to use the freq or inferred_freq
| attributes of the index. If neither of those attributes exist, a
| ValueError is thrown
|
| Returns
| -------
| shifted : NDFrame
|
| tz_convert(self, tz, axis=0, level=None, copy=True)
| Convert the axis to target time zone. If it is time zone naive, it
| will be localized to the passed time zone.
|
| Parameters
| ----------
| tz : string or pytz.timezone object
| axis : the axis to convert
| level : int, str, default None
| If axis ia a MultiIndex, convert a specific level. Otherwise
| must be None
| copy : boolean, default True
| Also make a copy of the underlying data
|
| Returns
| -------
|
| tz_localize(*args, **kwargs)
| Localize tz-naive TimeSeries to target time zone
|
| Parameters
| ----------
| tz : string or pytz.timezone object
| axis : the axis to localize
| level : int, str, default None
| If axis ia a MultiIndex, localize a specific level. Otherwise
| must be None
| copy : boolean, default True
| Also make a copy of the underlying data
| ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
| - 'infer' will attempt to infer fall dst-transition hours based on order
| - bool-ndarray where True signifies a DST time, False designates
| a non-DST time (note that this flag is only applicable for ambiguous times)
| - 'NaT' will return NaT where there are ambiguous times
| - 'raise' will raise an AmbiguousTimeError if there are ambiguous times
| infer_dst : boolean, default False (DEPRECATED)
| Attempt to infer fall dst-transition hours based on order
|
| Returns
| -------
|
| where(self, cond, other=nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True)
| Return an object of same shape as self and whose corresponding
| entries are from self where cond is True and otherwise are from other.
|
| Parameters
| ----------
| cond : boolean NDFrame or array
| other : scalar or NDFrame
| inplace : boolean, default False
| Whether to perform the operation in place on the data
| axis : alignment axis if needed, default None
| level : alignment level if needed, default None
| try_cast : boolean, default False
| try to cast the result back to the input type (if possible),
| raise_on_error : boolean, default True
| Whether to raise on invalid data types (e.g. trying to where on
| strings)
|
| Returns
| -------
| wh : same type as caller
|
| xs(self, key, axis=0, level=None, copy=None, drop_level=True)
| Returns a cross-section (row(s) or column(s)) from the Series/DataFrame.
| Defaults to cross-section on the rows (axis=0).
|
| Parameters
| ----------
| key : object
| Some label contained in the index, or partially in a MultiIndex
| axis : int, default 0
| Axis to retrieve cross-section on
| level : object, defaults to first n levels (n=1 or len(key))
| In case of a key partially contained in a MultiIndex, indicate
| which levels are used. Levels can be referred by label or position.
| copy : boolean [deprecated]
| Whether to make a copy of the data
| drop_level : boolean, default True
| If False, returns object with same levels as self.
|
| Examples
| --------
| >>> df
| A B C
| a 4 5 2
| b 4 0 9
| c 9 7 3
| >>> df.xs('a')
| A 4
| B 5
| C 2
| Name: a
| >>> df.xs('C', axis=1)
| a 2
| b 9
| c 3
| Name: C
|
| >>> df
| A B C D
| first second third
| bar one 1 4 1 8 9
| two 1 7 5 5 0
| baz one 1 6 6 8 0
| three 2 5 3 5 3
| >>> df.xs(('baz', 'three'))
| A B C D
| third
| 2 5 3 5 3
| >>> df.xs('one', level=1)
| A B C D
| first third
| bar 1 4 1 8 9
| baz 1 6 6 8 0
| >>> df.xs(('baz', 2), level=[0, 'third'])
| A B C D
| second
| three 5 3 5 3
|
| Returns
| -------
| xs : Series or DataFrame
|
| Notes
| -----
| xs is only for getting, not setting values.
|
| MultiIndex Slicers is a generic way to get/set values on any level or levels
| it is a superset of xs functionality, see :ref:`MultiIndex Slicers <advanced.mi_slicers>`
|
| ----------------------------------------------------------------------
| Data descriptors inherited from pandas.core.generic.NDFrame:
|
| at
| Fast label-based scalar accessor
|
| Similarly to ``loc``, ``at`` provides **label** based scalar lookups.
| You can also set using these indexers.
|
| blocks
| Internal property, property synonym for as_blocks()
|
| dtypes
| Return the dtypes in this object
|
| empty
| True if NDFrame is entirely empty [no items]
|
| ftypes
| Return the ftypes (indication of sparse/dense and dtype)
| in this object.
|
| iat
| Fast integer location scalar accessor.
|
| Similarly to ``iloc``, ``iat`` provides **integer** based lookups.
| You can also set using these indexers.
|
| iloc
| Purely integer-location based indexing for selection by position.
|
| ``.iloc[]`` is primarily integer position based (from ``0`` to
| ``length-1`` of the axis), but may also be used with a boolean
| array.
|
| Allowed inputs are:
|
| - An integer, e.g. ``5``.
| - A list or array of integers, e.g. ``[4, 3, 0]``.
| - A slice object with ints, e.g. ``1:7``.
| - A boolean array.
|
| ``.iloc`` will raise ``IndexError`` if a requested indexer is
| out-of-bounds, except *slice* indexers which allow out-of-bounds
| indexing (this conforms with python/numpy *slice* semantics).
|
| See more at :ref:`Selection by Position <indexing.integer>`
|
| ix
| A primarily label-location based indexer, with integer position
| fallback.
|
| ``.ix[]`` supports mixed integer and label based access. It is
| primarily label based, but will fall back to integer positional
| access unless the corresponding axis is of integer type.
|
| ``.ix`` is the most general indexer and will support any of the
| inputs in ``.loc`` and ``.iloc``. ``.ix`` also supports floating
| point label schemes. ``.ix`` is exceptionally useful when dealing
| with mixed positional and label based hierachical indexes.
|
| However, when an axis is integer based, ONLY label based access
| and not positional access is supported. Thus, in such cases, it's
| usually better to be explicit and use ``.iloc`` or ``.loc``.
|
| See more at :ref:`Advanced Indexing <advanced>`.
|
| loc
| Purely label-location based indexer for selection by label.
|
| ``.loc[]`` is primarily label based, but may also be used with a
| boolean array.
|
| Allowed inputs are:
|
| - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
| interpreted as a *label* of the index, and **never** as an
| integer position along the index).
| - A list or array of labels, e.g. ``['a', 'b', 'c']``.
| - A slice object with labels, e.g. ``'a':'f'`` (note that contrary
| to usual python slices, **both** the start and the stop are included!).
| - A boolean array.
|
| ``.loc`` will raise a ``KeyError`` when the items are not found.
|
| See more at :ref:`Selection by Label <indexing.label>`
|
| ndim
| Number of axes / array dimensions
|
| size
| number of elements in the NDFrame
|
| values
| Numpy representation of NDFrame
|
| Notes
| -----
| The dtype will be a lower-common-denominator dtype (implicit
| upcasting); that is to say if the dtypes (even of numeric types)
| are mixed, the one that accommodates all will be chosen. Use this
| with care if you are not dealing with the blocks.
|
| e.g. If the dtypes are float16 and float32, dtype will be upcast to
| float32. If dtypes are int32 and uint8, dtype will be upcase to
| int32.
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from pandas.core.generic.NDFrame:
|
| is_copy = None
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.base.PandasObject:
|
| __dir__(self)
| Provide method name lookup and completion
| Only provide 'public' methods
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.base.StringMixin:
|
| __bytes__(self)
| Return a string representation for a particular object.
|
| Invoked by bytes(obj) in py3 only.
| Yields a bytestring in both py2/py3.
|
| __repr__(self)
| Return a string representation for a particular object.
|
| Yields Bytestring in Py2, Unicode String in py3.
|
| __str__(self)
| Return a string representation for a particular Object
|
| Invoked by str(df) in both py2/py3.
| Yields Bytestring in Py2, Unicode String in py3.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from pandas.core.base.StringMixin:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
In [ ]:
Content source: rbiswas4/Cadence_rutgersStudies
Similar notebooks: