In [1]:
import pandas as pd
In [3]:
help(pd.Series)
Help on class Series in module pandas.core.series:
class Series(pandas.core.base.IndexOpsMixin, pandas.core.strings.StringAccessorMixin, pandas.core.generic.NDFrame)
| One-dimensional ndarray with axis labels (including time series).
|
| Labels need not be unique but must be any hashable type. The object
| supports both integer- and label-based indexing and provides a host of
| methods for performing operations involving the index. Statistical
| methods from ndarray have been overridden to automatically exclude
| missing data (currently represented as NaN)
|
| Operations between Series (+, -, /, *, **) align values based on their
| associated index values-- they need not be the same length. The result
| index will be the sorted union of the two indexes.
|
| Parameters
| ----------
| data : array-like, dict, or scalar value
| Contains data stored in Series
| index : array-like or Index (1d)
| Values must be unique and hashable, same length as data. Index
| object (or other iterable of same length as data) Will default to
| RangeIndex(len(data)) if not provided. If both a dict and index
| sequence are used, the index will override the keys found in the
| dict.
| dtype : numpy.dtype or None
| If None, dtype will be inferred
| copy : boolean, default False
| Copy input data
|
| Method resolution order:
| Series
| pandas.core.base.IndexOpsMixin
| pandas.core.strings.StringAccessorMixin
| pandas.core.generic.NDFrame
| pandas.core.base.PandasObject
| pandas.core.base.StringMixin
| builtins.object
|
| Methods defined here:
|
| __add__ = wrapper(left, right, name='__add__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d0882f0>)
|
| __and__ = wrapper(self, other)
|
| __array__(self, result=None)
| the array interface, return my values
|
| __array_prepare__(self, result, context=None)
| Gets called prior to a ufunc
|
| __array_wrap__(self, result, context=None)
| Gets called after a ufunc
|
| __div__ = wrapper(left, right, name='__truediv__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d0889d8>)
|
| __divmod__ = wrapper(left, right, name='__divmod__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08cbf8>)
|
| __eq__ = wrapper(self, other, axis=None)
|
| __float__ = wrapper(self)
|
| __floordiv__ = wrapper(left, right, name='__floordiv__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d088b70>)
|
| __ge__ = wrapper(self, other, axis=None)
|
| __getitem__(self, key)
|
| __gt__ = wrapper(self, other, axis=None)
|
| __iadd__ = f(self, other)
|
| __imul__ = f(self, other)
|
| __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)
| Initialize self. See help(type(self)) for accurate signature.
|
| __int__ = wrapper(self)
|
| __ipow__ = f(self, other)
|
| __isub__ = f(self, other)
|
| __iter__(self)
| provide iteration over the values of the Series
| box values if necessary
|
| __itruediv__ = f(self, other)
|
| __le__ = wrapper(self, other, axis=None)
|
| __len__(self)
| return the length of the Series
|
| __long__ = wrapper(self)
|
| __lt__ = wrapper(self, other, axis=None)
|
| __mod__ = wrapper(left, right, name='__mod__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d088d08>)
|
| __mul__ = wrapper(left, right, name='__mul__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d088840>)
|
| __ne__ = wrapper(self, other, axis=None)
|
| __or__ = wrapper(self, other)
|
| __pow__ = wrapper(left, right, name='__pow__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d088ea0>)
|
| __radd__ = wrapper(left, right, name='__radd__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d088510>)
|
| __rand__ = wrapper(self, other)
|
| __rdiv__ = wrapper(left, right, name='__rtruediv__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08a510>)
|
| __rfloordiv__ = wrapper(left, right, name='__rfloordiv__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08a730>)
|
| __rmod__ = wrapper(left, right, name='__rmod__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08ab70>)
|
| __rmul__ = wrapper(left, right, name='__rmul__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08a0d0>)
|
| __ror__ = wrapper(self, other)
|
| __rpow__ = wrapper(left, right, name='__rpow__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08a950>)
|
| __rsub__ = wrapper(left, right, name='__rsub__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08a2f0>)
|
| __rtruediv__ = wrapper(left, right, name='__rtruediv__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d08a510>)
|
| __rxor__ = wrapper(self, other)
|
| __setitem__(self, key, value)
|
| __sub__ = wrapper(left, right, name='__sub__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d0886a8>)
|
| __truediv__ = wrapper(left, right, name='__truediv__', na_op=<function _arith_method_SERIES.<locals>.na_op at 0x7ff61d0889d8>)
|
| __unicode__(self)
| Return a string representation for a particular DataFrame
|
| Invoked by unicode(df) in py2 only. Yields a Unicode String in both
| py2/py3.
|
| __xor__ = wrapper(self, other)
|
| add(self, other, level=None, fill_value=None, axis=0)
| Addition of series and other, element-wise (binary operator `add`).
|
| Equivalent to ``series + other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.radd
|
| align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None)
| Align two object on their axes with the
| specified join method for each axis Index
|
| Parameters
| ----------
| other : DataFrame or Series
| join : {'outer', 'inner', 'left', 'right'}, default 'outer'
| axis : allowed axis of the other object, default None
| Align on index (0), columns (1), or both (None)
| level : int or level name, default None
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
| copy : boolean, default True
| Always returns new objects. If copy=False and no reindexing is
| required then original objects are returned.
| fill_value : scalar, default np.NaN
| Value to use for missing values. Defaults to NaN, but can be any
| "compatible" value
| method : str, default None
| limit : int, default None
| fill_axis : {0, 'index'}, default 0
| Filling axis, method and limit
| broadcast_axis : {0, 'index'}, default None
| Broadcast values along this axis, if aligning two objects of
| different dimensions
|
| .. versionadded:: 0.17.0
|
| Returns
| -------
| (left, right) : (Series, type of other)
| Aligned objects
|
| all(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs)
| Return whether all elements are True over requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| bool_only : boolean, default None
| Include only boolean columns. If None, will attempt to use everything,
| then use only boolean data. Not implemented for Series.
|
| Returns
| -------
| all : scalar or Series (if level specified)
|
| any(self, axis=None, bool_only=None, skipna=None, level=None, **kwargs)
| Return whether any element is True over requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| bool_only : boolean, default None
| Include only boolean columns. If None, will attempt to use everything,
| then use only boolean data. Not implemented for Series.
|
| Returns
| -------
| any : scalar or Series (if level specified)
|
| append(self, to_append, ignore_index=False, verify_integrity=False)
| Concatenate two or more Series.
|
| Parameters
| ----------
| to_append : Series or list/tuple of Series
| ignore_index : boolean, default False
| If True, do not use the index labels.
|
| .. versionadded: 0.19.0
|
| verify_integrity : boolean, default False
| If True, raise Exception on creating index with duplicates
|
| Returns
| -------
| appended : Series
|
| Examples
| --------
| >>> s1 = pd.Series([1, 2, 3])
| >>> s2 = pd.Series([4, 5, 6])
| >>> s3 = pd.Series([4, 5, 6], index=[3,4,5])
| >>> s1.append(s2)
| 0 1
| 1 2
| 2 3
| 0 4
| 1 5
| 2 6
| dtype: int64
|
| >>> s1.append(s3)
| 0 1
| 1 2
| 2 3
| 3 4
| 4 5
| 5 6
| dtype: int64
|
| With `ignore_index` set to True:
|
| >>> s1.append(s2, ignore_index=True)
| 0 1
| 1 2
| 2 3
| 3 4
| 4 5
| 5 6
| dtype: int64
|
| With `verify_integrity` set to True:
|
| >>> s1.append(s2, verify_integrity=True)
| ValueError: Indexes have overlapping values: [0, 1, 2]
|
| apply(self, func, convert_dtype=True, args=(), **kwds)
| Invoke function on values of Series. Can be ufunc (a NumPy function
| that applies to the entire Series) or a Python function that only works
| on single values
|
| Parameters
| ----------
| func : function
| convert_dtype : boolean, default True
| Try to find better dtype for elementwise function results. If
| False, leave as dtype=object
| args : tuple
| Positional arguments to pass to function in addition to the value
| Additional keyword arguments will be passed as keywords to the function
|
| Returns
| -------
| y : Series or DataFrame if func returns a Series
|
| See also
| --------
| Series.map: For element-wise operations
|
| Examples
| --------
|
| Create a series with typical summer temperatures for each city.
|
| >>> import pandas as pd
| >>> import numpy as np
| >>> series = pd.Series([20, 21, 12], index=['London',
| ... 'New York','Helsinki'])
| London 20
| New York 21
| Helsinki 12
| dtype: int64
|
| Square the values by defining a function and passing it as an
| argument to ``apply()``.
|
| >>> def square(x):
| ... return x**2
| >>> series.apply(square)
| London 400
| New York 441
| Helsinki 144
| dtype: int64
|
| Square the values by passing an anonymous function as an
| argument to ``apply()``.
|
| >>> series.apply(lambda x: x**2)
| London 400
| New York 441
| Helsinki 144
| dtype: int64
|
| Define a custom function that needs additional positional
| arguments and pass these additional arguments using the
| ``args`` keyword.
|
| >>> def subtract_custom_value(x, custom_value):
| ... return x-custom_value
|
| >>> series.apply(subtract_custom_value, args=(5,))
| London 15
| New York 16
| Helsinki 7
| dtype: int64
|
| Define a custom function that takes keyword arguments
| and pass these arguments to ``apply``.
|
| >>> def add_custom_values(x, **kwargs):
| ... for month in kwargs:
| ... x+=kwargs[month]
| ... return x
|
| >>> series.apply(add_custom_values, june=30, july=20, august=25)
| London 95
| New York 96
| Helsinki 87
| dtype: int64
|
| Use a function from the Numpy library.
|
| >>> series.apply(np.log)
| London 2.995732
| New York 3.044522
| Helsinki 2.484907
| dtype: float64
|
| argmax = idxmax(self, axis=None, skipna=True, *args, **kwargs)
|
| argmin = idxmin(self, axis=None, skipna=True, *args, **kwargs)
|
| argsort(self, axis=0, kind='quicksort', order=None)
| Overrides ndarray.argsort. Argsorts the value, omitting NA/null values,
| and places the result in the same locations as the non-NA values
|
| Parameters
| ----------
| axis : int (can only be zero)
| kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
| Choice of sorting algorithm. See np.sort for more
| information. 'mergesort' is the only stable algorithm
| order : ignored
|
| Returns
| -------
| argsorted : Series, with -1 indicated where nan values are present
|
| See also
| --------
| numpy.ndarray.argsort
|
| autocorr(self, lag=1)
| Lag-N autocorrelation
|
| Parameters
| ----------
| lag : int, default 1
| Number of lags to apply before performing autocorrelation.
|
| Returns
| -------
| autocorr : float
|
| between(self, left, right, inclusive=True)
| Return boolean Series equivalent to left <= series <= right. NA values
| will be treated as False
|
| Parameters
| ----------
| left : scalar
| Left boundary
| right : scalar
| Right boundary
|
| Returns
| -------
| is_between : Series
|
| combine(self, other, func, fill_value=nan)
| Perform elementwise binary operation on two Series using given function
| with optional fill value when an index is missing from one Series or
| the other
|
| Parameters
| ----------
| other : Series or scalar value
| func : function
| fill_value : scalar value
|
| Returns
| -------
| result : Series
|
| combine_first(self, other)
| Combine Series values, choosing the calling Series's values
| first. Result index will be the union of the two indexes
|
| Parameters
| ----------
| other : Series
|
| Returns
| -------
| y : Series
|
| compound(self, axis=None, skipna=None, level=None)
| Return the compound percentage of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| compounded : scalar or Series (if level specified)
|
| compress(self, condition, *args, **kwargs)
| Return selected slices of an array along given axis as a Series
|
| See also
| --------
| numpy.ndarray.compress
|
| corr(self, other, method='pearson', min_periods=None)
| Compute correlation with `other` Series, excluding missing values
|
| Parameters
| ----------
| other : Series
| method : {'pearson', 'kendall', 'spearman'}
| * pearson : standard correlation coefficient
| * kendall : Kendall Tau correlation coefficient
| * spearman : Spearman rank correlation
| min_periods : int, optional
| Minimum number of observations needed to have a valid result
|
|
| Returns
| -------
| correlation : float
|
| count(self, level=None)
| Return number of non-NA/null observations in the Series
|
| Parameters
| ----------
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a smaller Series
|
| Returns
| -------
| nobs : int or Series (if level specified)
|
| cov(self, other, min_periods=None)
| Compute covariance with Series, excluding missing values
|
| Parameters
| ----------
| other : Series
| min_periods : int, optional
| Minimum number of observations needed to have a valid result
|
| Returns
| -------
| covariance : float
|
| Normalized by N-1 (unbiased estimator).
|
| cummax(self, axis=None, skipna=True, *args, **kwargs)
| Return cumulative max over requested axis.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| cummax : scalar
|
| cummin(self, axis=None, skipna=True, *args, **kwargs)
| Return cumulative minimum over requested axis.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| cummin : scalar
|
| cumprod(self, axis=None, skipna=True, *args, **kwargs)
| Return cumulative product over requested axis.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| cumprod : scalar
|
| cumsum(self, axis=None, skipna=True, *args, **kwargs)
| Return cumulative sum over requested axis.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
|
| Returns
| -------
| cumsum : scalar
|
| diff(self, periods=1)
| 1st discrete difference of object
|
| Parameters
| ----------
| periods : int, default 1
| Periods to shift for forming difference
|
| Returns
| -------
| diffed : Series
|
| div = truediv(self, other, level=None, fill_value=None, axis=0)
|
| divide = truediv(self, other, level=None, fill_value=None, axis=0)
|
| dot(self, other)
| Matrix multiplication with DataFrame or inner-product with Series
| objects
|
| Parameters
| ----------
| other : Series or DataFrame
|
| Returns
| -------
| dot_product : scalar or Series
|
| drop_duplicates(self, keep='first', inplace=False)
| Return Series with duplicate values removed
|
| Parameters
| ----------
|
| keep : {'first', 'last', False}, default 'first'
| - ``first`` : Drop duplicates except for the first occurrence.
| - ``last`` : Drop duplicates except for the last occurrence.
| - False : Drop all duplicates.
| take_last : deprecated
| inplace : boolean, default False
| If True, performs operation inplace and returns None.
|
| Returns
| -------
| deduplicated : Series
|
| dropna(self, axis=0, inplace=False, **kwargs)
| Return Series without null values
|
| Returns
| -------
| valid : Series
| inplace : boolean, default False
| Do operation in place.
|
| duplicated(self, keep='first')
| Return boolean Series denoting duplicate values
|
| Parameters
| ----------
| keep : {'first', 'last', False}, default 'first'
| - ``first`` : Mark duplicates as ``True`` except for the first
| occurrence.
| - ``last`` : Mark duplicates as ``True`` except for the last
| occurrence.
| - False : Mark all duplicates as ``True``.
| take_last : deprecated
|
| Returns
| -------
| duplicated : Series
|
| eq(self, other, level=None, fill_value=None, axis=0)
| Equal to of series and other, element-wise (binary operator `eq`).
|
| Equivalent to ``series == other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.None
|
| ewm(self, com=None, span=None, halflife=None, alpha=None, min_periods=0, freq=None, adjust=True, ignore_na=False, axis=0)
| Provides exponential weighted functions
|
| .. versionadded:: 0.18.0
|
| Parameters
| ----------
| com : float, optional
| Specify decay in terms of center of mass,
| :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`
| span : float, optional
| Specify decay in terms of span,
| :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`
| halflife : float, optional
| Specify decay in terms of half-life,
| :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0`
| alpha : float, optional
| Specify smoothing factor :math:`\alpha` directly,
| :math:`0 < \alpha \leq 1`
|
| .. versionadded:: 0.18.0
|
| min_periods : int, default 0
| Minimum number of observations in window required to have a value
| (otherwise result is NA).
| freq : None or string alias / date offset object, default=None (DEPRECATED)
| Frequency to conform to before computing statistic
| adjust : boolean, default True
| Divide by decaying adjustment factor in beginning periods to account
| for imbalance in relative weightings (viewing EWMA as a moving average)
| ignore_na : boolean, default False
| Ignore missing values when calculating weights;
| specify True to reproduce pre-0.15.0 behavior
|
| Returns
| -------
| a Window sub-classed for the particular operation
|
| Examples
| --------
|
| >>> df = DataFrame({'B': [0, 1, 2, np.nan, 4]})
| B
| 0 0.0
| 1 1.0
| 2 2.0
| 3 NaN
| 4 4.0
|
| >>> df.ewm(com=0.5).mean()
| B
| 0 0.000000
| 1 0.750000
| 2 1.615385
| 3 1.615385
| 4 3.670213
|
| Notes
| -----
| Exactly one of center of mass, span, half-life, and alpha must be provided.
| Allowed values and relationship between the parameters are specified in the
| parameter descriptions above; see the link at the end of this section for
| a detailed explanation.
|
| The `freq` keyword is used to conform time series data to a specified
| frequency by resampling the data. This is done with the default parameters
| of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
|
| When adjust is True (default), weighted averages are calculated using
| weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1.
|
| When adjust is False, weighted averages are calculated recursively as:
| weighted_average[0] = arg[0];
| weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i].
|
| When ignore_na is False (default), weights are based on absolute positions.
| For example, the weights of x and y used in calculating the final weighted
| average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and
| (1-alpha)**2 and alpha (if adjust is False).
|
| When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based
| on relative positions. For example, the weights of x and y used in
| calculating the final weighted average of [x, None, y] are 1-alpha and 1
| (if adjust is True), and 1-alpha and alpha (if adjust is False).
|
| More details can be found at
| http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
|
| expanding(self, min_periods=1, freq=None, center=False, axis=0)
| Provides expanding transformations.
|
| .. versionadded:: 0.18.0
|
| Parameters
| ----------
| min_periods : int, default None
| Minimum number of observations in window required to have a value
| (otherwise result is NA).
| freq : string or DateOffset object, optional (default None) (DEPRECATED)
| Frequency to conform the data to before computing the statistic.
| Specified as a frequency string or DateOffset object.
| center : boolean, default False
| Set the labels at the center of the window.
| axis : int or string, default 0
|
| Returns
| -------
| a Window sub-classed for the particular operation
|
| Examples
| --------
|
| >>> df = DataFrame({'B': [0, 1, 2, np.nan, 4]})
| B
| 0 0.0
| 1 1.0
| 2 2.0
| 3 NaN
| 4 4.0
|
| >>> df.expanding(2).sum()
| B
| 0 NaN
| 1 1.0
| 2 3.0
| 3 3.0
| 4 7.0
|
| Notes
| -----
| By default, the result is set to the right edge of the window. This can be
| changed to the center of the window by setting ``center=True``.
|
| The `freq` keyword is used to conform time series data to a specified
| frequency by resampling the data. This is done with the default parameters
| of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
|
| fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs)
| Fill NA/NaN values using the specified method
|
| Parameters
| ----------
| value : scalar, dict, Series, or DataFrame
| Value to use to fill holes (e.g. 0), alternately a
| dict/Series/DataFrame of values specifying which value to use for
| each index (for a Series) or column (for a DataFrame). (values not
| in the dict/Series/DataFrame will not be filled). This value cannot
| be a list.
| method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
| Method to use for filling holes in reindexed Series
| pad / ffill: propagate last valid observation forward to next valid
| backfill / bfill: use NEXT valid observation to fill gap
| axis : {0, 'index'}
| inplace : boolean, default False
| If True, fill in place. Note: this will modify any
| other views on this object, (e.g. a no-copy slice for a column in a
| DataFrame).
| limit : int, default None
| If method is specified, this is the maximum number of consecutive
| NaN values to forward/backward fill. In other words, if there is
| a gap with more than this number of consecutive NaNs, it will only
| be partially filled. If method is not specified, this is the
| maximum number of entries along the entire axis where NaNs will be
| filled.
| downcast : dict, default is None
| a dict of item->dtype of what to downcast if possible,
| or the string 'infer' which will try to downcast to an appropriate
| equal type (e.g. float64 to int64 if possible)
|
| See Also
| --------
| reindex, asfreq
|
| Returns
| -------
| filled : Series
|
| first_valid_index(self)
| Return label for first non-NA/null value
|
| floordiv(self, other, level=None, fill_value=None, axis=0)
| Integer division of series and other, element-wise (binary operator `floordiv`).
|
| Equivalent to ``series // other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.rfloordiv
|
| ge(self, other, level=None, fill_value=None, axis=0)
| Greater than or equal to of series and other, element-wise (binary operator `ge`).
|
| Equivalent to ``series >= other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.None
|
| get_value(self, label, takeable=False)
| Quickly retrieve single value at passed index label
|
| Parameters
| ----------
| index : label
| takeable : interpret the index as indexers, default False
|
| Returns
| -------
| value : scalar value
|
| get_values(self)
| same as values (but handles sparseness conversions); is a view
|
| gt(self, other, level=None, fill_value=None, axis=0)
| Greater than of series and other, element-wise (binary operator `gt`).
|
| Equivalent to ``series > other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.None
|
| hist = hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, **kwds)
| Draw histogram of the input series using matplotlib
|
| Parameters
| ----------
| by : object, optional
| If passed, then used to form histograms for separate groups
| ax : matplotlib axis object
| If not passed, uses gca()
| grid : boolean, default True
| Whether to show axis grid lines
| xlabelsize : int, default None
| If specified changes the x-axis label size
| xrot : float, default None
| rotation of x axis labels
| ylabelsize : int, default None
| If specified changes the y-axis label size
| yrot : float, default None
| rotation of y axis labels
| figsize : tuple, default None
| figure size in inches by default
| bins: integer, default 10
| Number of histogram bins to be used
| kwds : keywords
| To be passed to the actual plotting function
|
| Notes
| -----
| See matplotlib documentation online for more on this
|
| idxmax(self, axis=None, skipna=True, *args, **kwargs)
| Index of first occurrence of maximum of values.
|
| Parameters
| ----------
| skipna : boolean, default True
| Exclude NA/null values
|
| Returns
| -------
| idxmax : Index of maximum of values
|
| Notes
| -----
| This method is the Series version of ``ndarray.argmax``.
|
| See Also
| --------
| DataFrame.idxmax
| numpy.ndarray.argmax
|
| idxmin(self, axis=None, skipna=True, *args, **kwargs)
| Index of first occurrence of minimum of values.
|
| Parameters
| ----------
| skipna : boolean, default True
| Exclude NA/null values
|
| Returns
| -------
| idxmin : Index of minimum of values
|
| Notes
| -----
| This method is the Series version of ``ndarray.argmin``.
|
| See Also
| --------
| DataFrame.idxmin
| numpy.ndarray.argmin
|
| iget(self, i, axis=0)
| DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead
|
| iget_value(self, i, axis=0)
| DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead
|
| irow(self, i, axis=0)
| DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead
|
| isin(self, values)
| Return a boolean :class:`~pandas.Series` showing whether each element
| in the :class:`~pandas.Series` is exactly contained in the passed
| sequence of ``values``.
|
| Parameters
| ----------
| values : set or list-like
| The sequence of values to test. Passing in a single string will
| raise a ``TypeError``. Instead, turn a single string into a
| ``list`` of one element.
|
| .. versionadded:: 0.18.1
|
| Support for values as a set
|
| Returns
| -------
| isin : Series (bool dtype)
|
| Raises
| ------
| TypeError
| * If ``values`` is a string
|
| See Also
| --------
| pandas.DataFrame.isin
|
| Examples
| --------
|
| >>> s = pd.Series(list('abc'))
| >>> s.isin(['a', 'c', 'e'])
| 0 True
| 1 False
| 2 True
| dtype: bool
|
| Passing a single string as ``s.isin('a')`` will raise an error. Use
| a list of one element instead:
|
| >>> s.isin(['a'])
| 0 True
| 1 False
| 2 False
| dtype: bool
|
| items = iteritems(self)
|
| iteritems(self)
| Lazily iterate over (index, value) tuples
|
| keys(self)
| Alias for index
|
| kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return unbiased kurtosis over requested axis using Fisher's definition of
| kurtosis (kurtosis of normal == 0.0). Normalized by N-1
|
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| kurt : scalar or Series (if level specified)
|
| kurtosis = kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
|
| last_valid_index(self)
| Return label for last non-NA/null value
|
| le(self, other, level=None, fill_value=None, axis=0)
| Less than or equal to of series and other, element-wise (binary operator `le`).
|
| Equivalent to ``series <= other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.None
|
| lt(self, other, level=None, fill_value=None, axis=0)
| Less than of series and other, element-wise (binary operator `lt`).
|
| Equivalent to ``series < other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.None
|
| mad(self, axis=None, skipna=None, level=None)
| Return the mean absolute deviation of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| mad : scalar or Series (if level specified)
|
| map(self, arg, na_action=None)
| Map values of Series using input correspondence (which can be
| a dict, Series, or function)
|
| Parameters
| ----------
| arg : function, dict, or Series
| na_action : {None, 'ignore'}
| If 'ignore', propagate NA values, without passing them to the
| mapping function
|
| Returns
| -------
| y : Series
| same index as caller
|
| Examples
| --------
|
| Map inputs to outputs
|
| >>> x
| one 1
| two 2
| three 3
|
| >>> y
| 1 foo
| 2 bar
| 3 baz
|
| >>> x.map(y)
| one foo
| two bar
| three baz
|
| Use na_action to control whether NA values are affected by the mapping
| function.
|
| >>> s = pd.Series([1, 2, 3, np.nan])
|
| >>> s2 = s.map(lambda x: 'this is a string {}'.format(x),
| na_action=None)
| 0 this is a string 1.0
| 1 this is a string 2.0
| 2 this is a string 3.0
| 3 this is a string nan
| dtype: object
|
| >>> s3 = s.map(lambda x: 'this is a string {}'.format(x),
| na_action='ignore')
| 0 this is a string 1.0
| 1 this is a string 2.0
| 2 this is a string 3.0
| 3 NaN
| dtype: object
|
| max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| This method returns the maximum of the values in the object.
| If you want the *index* of the maximum, use ``idxmax``. This is
| the equivalent of the ``numpy.ndarray`` method ``argmax``.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| max : scalar or Series (if level specified)
|
| mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the mean of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| mean : scalar or Series (if level specified)
|
| median(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the median of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| median : scalar or Series (if level specified)
|
| memory_usage(self, index=True, deep=False)
| Memory usage of the Series
|
| Parameters
| ----------
| index : bool
| Specifies whether to include memory usage of Series index
| deep : bool
| Introspect the data deeply, interrogate
| `object` dtypes for system-level memory consumption
|
| Returns
| -------
| scalar bytes of memory consumed
|
| Notes
| -----
| Memory usage does not include memory consumed by elements that
| are not components of the array if deep=False
|
| See Also
| --------
| numpy.ndarray.nbytes
|
| min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| This method returns the minimum of the values in the object.
| If you want the *index* of the minimum, use ``idxmin``. This is
| the equivalent of the ``numpy.ndarray`` method ``argmin``.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| min : scalar or Series (if level specified)
|
| mod(self, other, level=None, fill_value=None, axis=0)
| Modulo of series and other, element-wise (binary operator `mod`).
|
| Equivalent to ``series % other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.rmod
|
| mode(self)
| Returns the mode(s) of the dataset.
|
| Empty if nothing occurs at least 2 times. Always returns Series even
| if only one value.
|
| Parameters
| ----------
| sort : bool, default True
| If True, will lexicographically sort values, if False skips
| sorting. Result ordering when ``sort=False`` is not defined.
|
| Returns
| -------
| modes : Series (sorted)
|
| mul(self, other, level=None, fill_value=None, axis=0)
| Multiplication of series and other, element-wise (binary operator `mul`).
|
| Equivalent to ``series * other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.rmul
|
| multiply = mul(self, other, level=None, fill_value=None, axis=0)
|
| ne(self, other, level=None, fill_value=None, axis=0)
| Not equal to of series and other, element-wise (binary operator `ne`).
|
| Equivalent to ``series != other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.None
|
| nlargest(self, n=5, keep='first')
| Return the largest `n` elements.
|
| Parameters
| ----------
| n : int
| Return this many descending sorted values
| keep : {'first', 'last', False}, default 'first'
| Where there are duplicate values:
| - ``first`` : take the first occurrence.
| - ``last`` : take the last occurrence.
| take_last : deprecated
|
| Returns
| -------
| top_n : Series
| The n largest values in the Series, in sorted order
|
| Notes
| -----
| Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
| relative to the size of the ``Series`` object.
|
| See Also
| --------
| Series.nsmallest
|
| Examples
| --------
| >>> import pandas as pd
| >>> import numpy as np
| >>> s = pd.Series(np.random.randn(1e6))
| >>> s.nlargest(10) # only sorts up to the N requested
|
| nonzero(self)
| Return the indices of the elements that are non-zero
|
| This method is equivalent to calling `numpy.nonzero` on the
| series data. For compatability with NumPy, the return value is
| the same (a tuple with an array of indices for each dimension),
| but it will always be a one-item tuple because series only have
| one dimension.
|
| Examples
| --------
| >>> s = pd.Series([0, 3, 0, 4])
| >>> s.nonzero()
| (array([1, 3]),)
| >>> s.iloc[s.nonzero()[0]]
| 1 3
| 3 4
| dtype: int64
|
| See Also
| --------
| numpy.nonzero
|
| nsmallest(self, n=5, keep='first')
| Return the smallest `n` elements.
|
| Parameters
| ----------
| n : int
| Return this many ascending sorted values
| keep : {'first', 'last', False}, default 'first'
| Where there are duplicate values:
| - ``first`` : take the first occurrence.
| - ``last`` : take the last occurrence.
| take_last : deprecated
|
| Returns
| -------
| bottom_n : Series
| The n smallest values in the Series, in sorted order
|
| Notes
| -----
| Faster than ``.sort_values().head(n)`` for small `n` relative to
| the size of the ``Series`` object.
|
| See Also
| --------
| Series.nlargest
|
| Examples
| --------
| >>> import pandas as pd
| >>> import numpy as np
| >>> s = pd.Series(np.random.randn(1e6))
| >>> s.nsmallest(10) # only sorts up to the N requested
|
| order(self, na_last=None, ascending=True, kind='quicksort', na_position='last', inplace=False)
| DEPRECATED: use :meth:`Series.sort_values`
|
| Sorts Series object, by value, maintaining index-value link.
| This will return a new Series by default. Series.sort is the equivalent
| but as an inplace method.
|
| Parameters
| ----------
| na_last : boolean (optional, default=True)--DEPRECATED; use na_position
| Put NaN's at beginning or end
| ascending : boolean, default True
| Sort ascending. Passing False sorts descending
| kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
| Choice of sorting algorithm. See np.sort for more
| information. 'mergesort' is the only stable algorithm
| na_position : {'first', 'last'} (optional, default='last')
| 'first' puts NaNs at the beginning
| 'last' puts NaNs at the end
| inplace : boolean, default False
| Do operation in place.
|
| Returns
| -------
| y : Series
|
| See Also
| --------
| Series.sort_values
|
| pow(self, other, level=None, fill_value=None, axis=0)
| Exponential power of series and other, element-wise (binary operator `pow`).
|
| Equivalent to ``series ** other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.rpow
|
| prod(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the product of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| prod : scalar or Series (if level specified)
|
| product = prod(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
|
| ptp(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Returns the difference between the maximum value and the
| minimum value in the object. This is the equivalent of the
| ``numpy.ndarray`` method ``ptp``.
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| ptp : scalar or Series (if level specified)
|
| put(self, *args, **kwargs)
| Applies the `put` method to its `values` attribute
| if it has one.
|
| See also
| --------
| numpy.ndarray.put
|
| quantile(self, q=0.5, interpolation='linear')
| Return value at the given quantile, a la numpy.percentile.
|
| Parameters
| ----------
| q : float or array-like, default 0.5 (50% quantile)
| 0 <= q <= 1, the quantile(s) to compute
| interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
| .. versionadded:: 0.18.0
|
| This optional parameter specifies the interpolation method to use,
| when the desired quantile lies between two data points `i` and `j`:
|
| * linear: `i + (j - i) * fraction`, where `fraction` is the
| fractional part of the index surrounded by `i` and `j`.
| * lower: `i`.
| * higher: `j`.
| * nearest: `i` or `j` whichever is nearest.
| * midpoint: (`i` + `j`) / 2.
|
| Returns
| -------
| quantile : float or Series
| if ``q`` is an array, a Series will be returned where the
| index is ``q`` and the values are the quantiles.
|
| Examples
| --------
| >>> s = Series([1, 2, 3, 4])
| >>> s.quantile(.5)
| 2.5
| >>> s.quantile([.25, .5, .75])
| 0.25 1.75
| 0.50 2.50
| 0.75 3.25
| dtype: float64
|
| radd(self, other, level=None, fill_value=None, axis=0)
| Addition of series and other, element-wise (binary operator `radd`).
|
| Equivalent to ``other + series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.add
|
| ravel(self, order='C')
| Return the flattened underlying data as an ndarray
|
| See also
| --------
| numpy.ndarray.ravel
|
| rdiv = rtruediv(self, other, level=None, fill_value=None, axis=0)
|
| reindex(self, index=None, **kwargs)
| Conform Series to new index with optional filling logic, placing
| NA/NaN in locations having no value in the previous index. A new object
| is produced unless the new index is equivalent to the current one and
| copy=False
|
| Parameters
| ----------
| index : array-like, optional (can be specified in order, or as
| keywords)
| New labels / index to conform to. Preferably an Index object to
| avoid duplicating data
| method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
| method to use for filling holes in reindexed DataFrame.
| Please note: this is only applicable to DataFrames/Series with a
| monotonically increasing/decreasing index.
|
| * default: don't fill gaps
| * pad / ffill: propagate last valid observation forward to next
| valid
| * backfill / bfill: use next valid observation to fill gap
| * nearest: use nearest valid observations to fill gap
|
| copy : boolean, default True
| Return a new object, even if the passed indexes are the same
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
| fill_value : scalar, default np.NaN
| Value to use for missing values. Defaults to NaN, but can be any
| "compatible" value
| limit : int, default None
| Maximum number of consecutive elements to forward or backward fill
| tolerance : optional
| Maximum distance between original and new labels for inexact
| matches. The values of the index at the matching locations most
| satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
|
| .. versionadded:: 0.17.0
|
| Examples
| --------
|
| Create a dataframe with some fictional data.
|
| >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
| >>> df = pd.DataFrame({
| ... 'http_status': [200,200,404,404,301],
| ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
| ... index=index)
| >>> df
| http_status response_time
| Firefox 200 0.04
| Chrome 200 0.02
| Safari 404 0.07
| IE10 404 0.08
| Konqueror 301 1.00
|
| Create a new index and reindex the dataframe. By default
| values in the new index that do not have corresponding
| records in the dataframe are assigned ``NaN``.
|
| >>> new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',
| ... 'Chrome']
| >>> df.reindex(new_index)
| http_status response_time
| Safari 404 0.07
| Iceweasel NaN NaN
| Comodo Dragon NaN NaN
| IE10 404 0.08
| Chrome 200 0.02
|
| We can fill in the missing values by passing a value to
| the keyword ``fill_value``. Because the index is not monotonically
| increasing or decreasing, we cannot use arguments to the keyword
| ``method`` to fill the ``NaN`` values.
|
| >>> df.reindex(new_index, fill_value=0)
| http_status response_time
| Safari 404 0.07
| Iceweasel 0 0.00
| Comodo Dragon 0 0.00
| IE10 404 0.08
| Chrome 200 0.02
|
| >>> df.reindex(new_index, fill_value='missing')
| http_status response_time
| Safari 404 0.07
| Iceweasel missing missing
| Comodo Dragon missing missing
| IE10 404 0.08
| Chrome 200 0.02
|
| To further illustrate the filling functionality in
| ``reindex``, we will create a dataframe with a
| monotonically increasing index (for example, a sequence
| of dates).
|
| >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')
| >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]},
| ... index=date_index)
| >>> df2
| prices
| 2010-01-01 100
| 2010-01-02 101
| 2010-01-03 NaN
| 2010-01-04 100
| 2010-01-05 89
| 2010-01-06 88
|
| Suppose we decide to expand the dataframe to cover a wider
| date range.
|
| >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
| >>> df2.reindex(date_index2)
| prices
| 2009-12-29 NaN
| 2009-12-30 NaN
| 2009-12-31 NaN
| 2010-01-01 100
| 2010-01-02 101
| 2010-01-03 NaN
| 2010-01-04 100
| 2010-01-05 89
| 2010-01-06 88
| 2010-01-07 NaN
|
| The index entries that did not have a value in the original data frame
| (for example, '2009-12-29') are by default filled with ``NaN``.
| If desired, we can fill in the missing values using one of several
| options.
|
| For example, to backpropagate the last valid value to fill the ``NaN``
| values, pass ``bfill`` as an argument to the ``method`` keyword.
|
| >>> df2.reindex(date_index2, method='bfill')
| prices
| 2009-12-29 100
| 2009-12-30 100
| 2009-12-31 100
| 2010-01-01 100
| 2010-01-02 101
| 2010-01-03 NaN
| 2010-01-04 100
| 2010-01-05 89
| 2010-01-06 88
| 2010-01-07 NaN
|
| Please note that the ``NaN`` value present in the original dataframe
| (at index value 2010-01-03) will not be filled by any of the
| value propagation schemes. This is because filling while reindexing
| does not look at dataframe values, but only compares the original and
| desired indexes. If you do want to fill in the ``NaN`` values present
| in the original dataframe, use the ``fillna()`` method.
|
| Returns
| -------
| reindexed : Series
|
| reindex_axis(self, labels, axis=0, **kwargs)
| for compatibility with higher dims
|
| rename(self, index=None, **kwargs)
| Alter axes input function or functions. Function / dict values must be
| unique (1-to-1). Labels not contained in a dict / Series will be left
| as-is. Extra labels listed don't throw an error. Alternatively, change
| ``Series.name`` with a scalar value (Series only).
|
| Parameters
| ----------
| index : scalar, list-like, dict-like or function, optional
| Scalar or list-like will alter the ``Series.name`` attribute,
| and raise on DataFrame or Panel.
| dict-like or functions are transformations to apply to
| that axis' values
| copy : boolean, default True
| Also copy underlying data
| inplace : boolean, default False
| Whether to return a new Series. If True then value of copy is
| ignored.
|
| Returns
| -------
| renamed : Series (new object)
|
| See Also
| --------
| pandas.NDFrame.rename_axis
|
| Examples
| --------
| >>> s = pd.Series([1, 2, 3])
| >>> s
| 0 1
| 1 2
| 2 3
| dtype: int64
| >>> s.rename("my_name") # scalar, changes Series.name
| 0 1
| 1 2
| 2 3
| Name: my_name, dtype: int64
| >>> s.rename(lambda x: x ** 2) # function, changes labels
| 0 1
| 1 2
| 4 3
| dtype: int64
| >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
| 0 1
| 3 2
| 5 3
| dtype: int64
| >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
| >>> df.rename(2)
| ...
| TypeError: 'int' object is not callable
| >>> df.rename(index=str, columns={"A": "a", "B": "c"})
| a c
| 0 1 4
| 1 2 5
| 2 3 6
| >>> df.rename(index=str, columns={"A": "a", "C": "c"})
| a B
| 0 1 4
| 1 2 5
| 2 3 6
|
| reorder_levels(self, order)
| Rearrange index levels using input order. May not drop or duplicate
| levels
|
| Parameters
| ----------
| order: list of int representing new level order.
| (reference level by number or key)
| axis: where to reorder levels
|
| Returns
| -------
| type of caller (new object)
|
| repeat(self, reps, *args, **kwargs)
| Repeat elements of an Series. Refer to `numpy.ndarray.repeat`
| for more information about the `reps` argument.
|
| See also
| --------
| numpy.ndarray.repeat
|
| reset_index(self, level=None, drop=False, name=None, inplace=False)
| Analogous to the :meth:`pandas.DataFrame.reset_index` function, see
| docstring there.
|
| Parameters
| ----------
| level : int, str, tuple, or list, default None
| Only remove the given levels from the index. Removes all levels by
| default
| drop : boolean, default False
| Do not try to insert index into dataframe columns
| name : object, default None
| The name of the column corresponding to the Series values
| inplace : boolean, default False
| Modify the Series in place (do not create a new object)
|
| Returns
| ----------
| resetted : DataFrame, or Series if drop == True
|
| reshape(self, *args, **kwargs)
| DEPRECATED: calling this method will raise an error in a
| future release. Please call ``.values.reshape(...)`` instead.
|
| return an ndarray with the values shape
| if the specified shape matches exactly the current shape, then
| return self (for compat)
|
| See also
| --------
| numpy.ndarray.reshape
|
| rfloordiv(self, other, level=None, fill_value=None, axis=0)
| Integer division of series and other, element-wise (binary operator `rfloordiv`).
|
| Equivalent to ``other // series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.floordiv
|
| rmod(self, other, level=None, fill_value=None, axis=0)
| Modulo of series and other, element-wise (binary operator `rmod`).
|
| Equivalent to ``other % series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.mod
|
| rmul(self, other, level=None, fill_value=None, axis=0)
| Multiplication of series and other, element-wise (binary operator `rmul`).
|
| Equivalent to ``other * series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.mul
|
| rolling(self, window, min_periods=None, freq=None, center=False, win_type=None, on=None, axis=0)
| Provides rolling window calculcations.
|
| .. versionadded:: 0.18.0
|
| Parameters
| ----------
| window : int, or offset
| Size of the moving window. This is the number of observations used for
| calculating the statistic. Each window will be a fixed size.
|
| If its an offset then this will be the time period of each window. Each
| window will be a variable sized based on the observations included in
| the time-period. This is only valid for datetimelike indexes. This is
| new in 0.19.0
| min_periods : int, default None
| Minimum number of observations in window required to have a value
| (otherwise result is NA). For a window that is specified by an offset,
| this will default to 1.
| freq : string or DateOffset object, optional (default None) (DEPRECATED)
| Frequency to conform the data to before computing the statistic.
| Specified as a frequency string or DateOffset object.
| center : boolean, default False
| Set the labels at the center of the window.
| win_type : string, default None
| Provide a window type. See the notes below.
| on : string, optional
| For a DataFrame, column on which to calculate
| the rolling window, rather than the index
|
| .. versionadded:: 0.19.0
|
| axis : int or string, default 0
|
| Returns
| -------
| a Window or Rolling sub-classed for the particular operation
|
| Examples
| --------
|
| >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
| >>> df
| B
| 0 0.0
| 1 1.0
| 2 2.0
| 3 NaN
| 4 4.0
|
| Rolling sum with a window length of 2, using the 'triang'
| window type.
|
| >>> df.rolling(2, win_type='triang').sum()
| B
| 0 NaN
| 1 1.0
| 2 2.5
| 3 NaN
| 4 NaN
|
| Rolling sum with a window length of 2, min_periods defaults
| to the window length.
|
| >>> df.rolling(2).sum()
| B
| 0 NaN
| 1 1.0
| 2 3.0
| 3 NaN
| 4 NaN
|
| Same as above, but explicity set the min_periods
|
| >>> df.rolling(2, min_periods=1).sum()
| B
| 0 0.0
| 1 1.0
| 2 3.0
| 3 2.0
| 4 4.0
|
| A ragged (meaning not-a-regular frequency), time-indexed DataFrame
|
| >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
| ....: index = [pd.Timestamp('20130101 09:00:00'),
| ....: pd.Timestamp('20130101 09:00:02'),
| ....: pd.Timestamp('20130101 09:00:03'),
| ....: pd.Timestamp('20130101 09:00:05'),
| ....: pd.Timestamp('20130101 09:00:06')])
|
| >>> df
| B
| 2013-01-01 09:00:00 0.0
| 2013-01-01 09:00:02 1.0
| 2013-01-01 09:00:03 2.0
| 2013-01-01 09:00:05 NaN
| 2013-01-01 09:00:06 4.0
|
|
| Contrasting to an integer rolling window, this will roll a variable
| length window corresponding to the time period.
| The default for min_periods is 1.
|
| >>> df.rolling('2s').sum()
| B
| 2013-01-01 09:00:00 0.0
| 2013-01-01 09:00:02 1.0
| 2013-01-01 09:00:03 3.0
| 2013-01-01 09:00:05 NaN
| 2013-01-01 09:00:06 4.0
|
| Notes
| -----
| By default, the result is set to the right edge of the window. This can be
| changed to the center of the window by setting ``center=True``.
|
| The `freq` keyword is used to conform time series data to a specified
| frequency by resampling the data. This is done with the default parameters
| of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
|
| To learn more about the offsets & frequency strings, please see `this link
| <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
|
| The recognized win_types are:
|
| * ``boxcar``
| * ``triang``
| * ``blackman``
| * ``hamming``
| * ``bartlett``
| * ``parzen``
| * ``bohman``
| * ``blackmanharris``
| * ``nuttall``
| * ``barthann``
| * ``kaiser`` (needs beta)
| * ``gaussian`` (needs std)
| * ``general_gaussian`` (needs power, width)
| * ``slepian`` (needs width).
|
| round(self, decimals=0, *args, **kwargs)
| Round each value in a Series to the given number of decimals.
|
| Parameters
| ----------
| decimals : int
| Number of decimal places to round to (default: 0).
| If decimals is negative, it specifies the number of
| positions to the left of the decimal point.
|
| Returns
| -------
| Series object
|
| See Also
| --------
| numpy.around
| DataFrame.round
|
| rpow(self, other, level=None, fill_value=None, axis=0)
| Exponential power of series and other, element-wise (binary operator `rpow`).
|
| Equivalent to ``other ** series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.pow
|
| rsub(self, other, level=None, fill_value=None, axis=0)
| Subtraction of series and other, element-wise (binary operator `rsub`).
|
| Equivalent to ``other - series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.sub
|
| rtruediv(self, other, level=None, fill_value=None, axis=0)
| Floating division of series and other, element-wise (binary operator `rtruediv`).
|
| Equivalent to ``other / series``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.truediv
|
| searchsorted(self, v, side='left', sorter=None)
| Find indices where elements should be inserted to maintain order.
|
| Find the indices into a sorted Series `self` such that, if the
| corresponding elements in `v` were inserted before the indices, the
| order of `self` would be preserved.
|
| Parameters
| ----------
| v : array_like
| Values to insert into `self`.
| side : {'left', 'right'}, optional
| If 'left', the index of the first suitable location found is given.
| If 'right', return the last such index. If there is no suitable
| index, return either 0 or N (where N is the length of `self`).
| sorter : 1-D array_like, optional
| Optional array of integer indices that sort `self` into ascending
| order. They are typically the result of ``np.argsort``.
|
| Returns
| -------
| indices : array of ints
| Array of insertion points with the same shape as `v`.
|
| See Also
| --------
| numpy.searchsorted
|
| Notes
| -----
| Binary search is used to find the required insertion points.
|
| Examples
| --------
| >>> x = pd.Series([1, 2, 3])
| >>> x
| 0 1
| 1 2
| 2 3
| dtype: int64
| >>> x.searchsorted(4)
| array([3])
| >>> x.searchsorted([0, 4])
| array([0, 3])
| >>> x.searchsorted([1, 3], side='left')
| array([0, 2])
| >>> x.searchsorted([1, 3], side='right')
| array([1, 3])
| >>>
| >>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
| [apple, bread, bread, cheese, milk]
| Categories (4, object): [apple < bread < cheese < milk]
| >>> x.searchsorted('bread')
| array([1]) # Note: an array, not a scalar
| >>> x.searchsorted(['bread'])
| array([1])
| >>> x.searchsorted(['bread', 'eggs'])
| array([1, 4])
| >>> x.searchsorted(['bread', 'eggs'], side='right')
| array([3, 4]) # eggs before milk
|
| sem(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)
| Return unbiased standard error of the mean over requested axis.
|
| Normalized by N-1 by default. This can be changed using the ddof argument
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| ddof : int, default 1
| degrees of freedom
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| sem : scalar or Series (if level specified)
|
| set_value(self, label, value, takeable=False)
| Quickly set single value at passed label. If label is not contained, a
| new object is created with the label placed at the end of the result
| index
|
| Parameters
| ----------
| label : object
| Partial indexing with MultiIndex not allowed
| value : object
| Scalar value
| takeable : interpret the index as indexers, default False
|
| Returns
| -------
| series : Series
| If label is contained, will be reference to calling Series,
| otherwise a new object
|
| shift(self, periods=1, freq=None, axis=0)
| Shift index by desired number of periods with an optional time freq
|
| Parameters
| ----------
| periods : int
| Number of periods to move, can be positive or negative
| freq : DateOffset, timedelta, or time rule string, optional
| Increment to use from the tseries module or time rule (e.g. 'EOM').
| See Notes.
| axis : {0, 'index'}
|
| Notes
| -----
| If freq is specified then the index values are shifted but the data
| is not realigned. That is, use freq if you would like to extend the
| index when shifting and preserve the original data.
|
| Returns
| -------
| shifted : Series
|
| skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return unbiased skew over requested axis
| Normalized by N-1
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| skew : scalar or Series (if level specified)
|
| sort(self, axis=0, ascending=True, kind='quicksort', na_position='last', inplace=True)
| DEPRECATED: use :meth:`Series.sort_values(inplace=True)` for INPLACE
| sorting
|
| Sort values and index labels by value. This is an inplace sort by
| default. Series.order is the equivalent but returns a new Series.
|
| Parameters
| ----------
| axis : int (can only be zero)
| ascending : boolean, default True
| Sort ascending. Passing False sorts descending
| kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
| Choice of sorting algorithm. See np.sort for more
| information. 'mergesort' is the only stable algorithm
| na_position : {'first', 'last'} (optional, default='last')
| 'first' puts NaNs at the beginning
| 'last' puts NaNs at the end
| inplace : boolean, default True
| Do operation in place.
|
| See Also
| --------
| Series.sort_values
|
| sort_index(self, axis=0, level=None, ascending=True, inplace=False, sort_remaining=True)
| Sort object by labels (along an axis)
|
| Parameters
| ----------
| axis : index to direct sorting
| level : int or level name or list of ints or list of level names
| if not None, sort on values in specified index level(s)
| ascending : boolean, default True
| Sort ascending vs. descending
| inplace : bool, default False
| if True, perform operation in-place
| kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
| Choice of sorting algorithm. See also ndarray.np.sort for more
| information. `mergesort` is the only stable algorithm. For
| DataFrames, this option is only applied when sorting on a single
| column or label.
| na_position : {'first', 'last'}, default 'last'
| `first` puts NaNs at the beginning, `last` puts NaNs at the end
| sort_remaining : bool, default True
| if true and sorting by level and index is multilevel, sort by other
| levels too (in order) after sorting by specified level
|
| Returns
| -------
| sorted_obj : Series
|
| sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')
| Sort by the values along either axis
|
| .. versionadded:: 0.17.0
|
| Parameters
| ----------
| axis : {0, 'index'}, default 0
| Axis to direct sorting
| ascending : bool or list of bool, default True
| Sort ascending vs. descending. Specify list for multiple sort
| orders. If this is a list of bools, must match the length of
| the by.
| inplace : bool, default False
| if True, perform operation in-place
| kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
| Choice of sorting algorithm. See also ndarray.np.sort for more
| information. `mergesort` is the only stable algorithm. For
| DataFrames, this option is only applied when sorting on a single
| column or label.
| na_position : {'first', 'last'}, default 'last'
| `first` puts NaNs at the beginning, `last` puts NaNs at the end
|
| Returns
| -------
| sorted_obj : Series
|
| sortlevel(self, level=0, ascending=True, sort_remaining=True)
| Sort Series with MultiIndex by chosen level. Data will be
| lexicographically sorted by the chosen level followed by the other
| levels (in order)
|
| Parameters
| ----------
| level : int or level name, default None
| ascending : bool, default True
|
| Returns
| -------
| sorted : Series
|
| See Also
| --------
| Series.sort_index(level=...)
|
| std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)
| Return sample standard deviation over requested axis.
|
| Normalized by N-1 by default. This can be changed using the ddof argument
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| ddof : int, default 1
| degrees of freedom
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| std : scalar or Series (if level specified)
|
| sub(self, other, level=None, fill_value=None, axis=0)
| Subtraction of series and other, element-wise (binary operator `sub`).
|
| Equivalent to ``series - other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.rsub
|
| subtract = sub(self, other, level=None, fill_value=None, axis=0)
|
| sum(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs)
| Return the sum of the values for the requested axis
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| sum : scalar or Series (if level specified)
|
| swaplevel(self, i=-2, j=-1, copy=True)
| Swap levels i and j in a MultiIndex
|
| Parameters
| ----------
| i, j : int, string (can be mixed)
| Level of index to be swapped. Can pass level name as string.
|
| Returns
| -------
| swapped : Series
|
| .. versionchanged:: 0.18.1
|
| The indexes ``i`` and ``j`` are now optional, and default to
| the two innermost levels of the index.
|
| take(self, indices, axis=0, convert=True, is_copy=False, **kwargs)
| return Series corresponding to requested indices
|
| Parameters
| ----------
| indices : list / array of ints
| convert : translate negative to positive indices (default)
|
| Returns
| -------
| taken : Series
|
| See also
| --------
| numpy.ndarray.take
|
| to_csv(self, path=None, index=True, sep=',', na_rep='', float_format=None, header=False, index_label=None, mode='w', encoding=None, date_format=None, decimal='.')
| Write Series to a comma-separated values (csv) file
|
| Parameters
| ----------
| path : string or file handle, default None
| File path or object, if None is provided the result is returned as
| a string.
| na_rep : string, default ''
| Missing data representation
| float_format : string, default None
| Format string for floating point numbers
| header : boolean, default False
| Write out series name
| index : boolean, default True
| Write row names (index)
| index_label : string or sequence, default None
| Column label for index column(s) if desired. If None is given, and
| `header` and `index` are True, then the index names are used. A
| sequence should be given if the DataFrame uses MultiIndex.
| mode : Python write mode, default 'w'
| sep : character, default ","
| Field delimiter for the output file.
| encoding : string, optional
| a string representing the encoding to use if the contents are
| non-ascii, for python versions prior to 3
| date_format: string, default None
| Format string for datetime objects.
| decimal: string, default '.'
| Character recognized as decimal separator. E.g. use ',' for
| European data
|
| to_dict(self)
| Convert Series to {label -> value} dict
|
| Returns
| -------
| value_dict : dict
|
| to_frame(self, name=None)
| Convert Series to DataFrame
|
| Parameters
| ----------
| name : object, default None
| The passed name should substitute for the series name (if it has
| one).
|
| Returns
| -------
| data_frame : DataFrame
|
| to_period(self, freq=None, copy=True)
| Convert Series from DatetimeIndex to PeriodIndex with desired
| frequency (inferred from index if not passed)
|
| Parameters
| ----------
| freq : string, default
|
| Returns
| -------
| ts : Series with PeriodIndex
|
| to_sparse(self, kind='block', fill_value=None)
| Convert Series to SparseSeries
|
| Parameters
| ----------
| kind : {'block', 'integer'}
| fill_value : float, defaults to NaN (missing)
|
| Returns
| -------
| sp : SparseSeries
|
| to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, index=True, length=False, dtype=False, name=False, max_rows=None)
| Render a string representation of the Series
|
| Parameters
| ----------
| buf : StringIO-like, optional
| buffer to write to
| na_rep : string, optional
| string representation of NAN to use, default 'NaN'
| float_format : one-parameter function, optional
| formatter function to apply to columns' elements if they are floats
| default None
| header: boolean, default True
| Add the Series header (index name)
| index : bool, optional
| Add index (row) labels, default True
| length : boolean, default False
| Add the Series length
| dtype : boolean, default False
| Add the Series dtype
| name : boolean, default False
| Add the Series name if not None
| max_rows : int, optional
| Maximum number of rows to show before truncating. If None, show
| all.
|
| Returns
| -------
| formatted : string (if not buffer passed)
|
| to_timestamp(self, freq=None, how='start', copy=True)
| Cast to datetimeindex of timestamps, at *beginning* of period
|
| Parameters
| ----------
| freq : string, default frequency of PeriodIndex
| Desired frequency
| how : {'s', 'e', 'start', 'end'}
| Convention for converting period to timestamp; start of period
| vs. end
|
| Returns
| -------
| ts : Series with DatetimeIndex
|
| tolist(self)
| Convert Series to a nested list
|
| truediv(self, other, level=None, fill_value=None, axis=0)
| Floating division of series and other, element-wise (binary operator `truediv`).
|
| Equivalent to ``series / other``, but with support to substitute a fill_value for
| missing data in one of the inputs.
|
| Parameters
| ----------
| other: Series or scalar value
| fill_value : None or float value, default None (NaN)
| Fill missing (NaN) values with this value. If both Series are
| missing, the result will be missing
| level : int or name
| Broadcast across a level, matching Index values on the
| passed MultiIndex level
|
| Returns
| -------
| result : Series
|
| See also
| --------
| Series.rtruediv
|
| unique(self)
| Return np.ndarray of unique values in the object.
| Significantly faster than numpy.unique. Includes NA values.
| The order of the original is preserved.
|
| Returns
| -------
| uniques : np.ndarray
|
| unstack(self, level=-1, fill_value=None)
| Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
| The level involved will automatically get sorted.
|
| Parameters
| ----------
| level : int, string, or list of these, default last level
| Level(s) to unstack, can pass level name
| fill_value : replace NaN with this value if the unstack produces
| missing values
|
| .. versionadded: 0.18.0
|
| Examples
| --------
| >>> s
| one a 1.
| one b 2.
| two a 3.
| two b 4.
|
| >>> s.unstack(level=-1)
| a b
| one 1. 2.
| two 3. 4.
|
| >>> s.unstack(level=0)
| one two
| a 1. 2.
| b 3. 4.
|
| Returns
| -------
| unstacked : DataFrame
|
| update(self, other)
| Modify Series in place using non-NA values from passed
| Series. Aligns on index
|
| Parameters
| ----------
| other : Series
|
| valid lambda self, inplace=False, **kwargs
|
| var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs)
| Return unbiased variance over requested axis.
|
| Normalized by N-1 by default. This can be changed using the ddof argument
|
| Parameters
| ----------
| axis : {index (0)}
| skipna : boolean, default True
| Exclude NA/null values. If an entire row/column is NA, the result
| will be NA
| level : int or level name, default None
| If the axis is a MultiIndex (hierarchical), count along a
| particular level, collapsing into a scalar
| ddof : int, default 1
| degrees of freedom
| numeric_only : boolean, default None
| Include only float, int, boolean columns. If None, will attempt to use
| everything, then use only numeric data. Not implemented for Series.
|
| Returns
| -------
| var : scalar or Series (if level specified)
|
| view(self, dtype=None)
|
| ----------------------------------------------------------------------
| Class methods defined here:
|
| from_array(arr, index=None, name=None, dtype=None, copy=False, fastpath=False) from builtins.type
|
| from_csv(path, sep=',', parse_dates=True, header=None, index_col=0, encoding=None, infer_datetime_format=False) from builtins.type
| Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv`
| instead).
|
| It is preferable to use the more powerful :func:`pandas.read_csv`
| for most general purposes, but ``from_csv`` makes for an easy
| roundtrip to and from a file (the exact counterpart of
| ``to_csv``), especially with a time Series.
|
| This method only differs from :func:`pandas.read_csv` in some defaults:
|
| - `index_col` is ``0`` instead of ``None`` (take first column as index
| by default)
| - `header` is ``None`` instead of ``0`` (the first row is not used as
| the column names)
| - `parse_dates` is ``True`` instead of ``False`` (try parsing the index
| as datetime by default)
|
| With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used
| to return a Series like ``from_csv``.
|
| Parameters
| ----------
| path : string file path or file handle / StringIO
| sep : string, default ','
| Field delimiter
| parse_dates : boolean, default True
| Parse dates. Different default from read_table
| header : int, default None
| Row to use as header (skip prior rows)
| index_col : int or sequence, default 0
| Column to use for index. If a sequence is given, a MultiIndex
| is used. Different default from read_table
| encoding : string, optional
| a string representing the encoding to use if the contents are
| non-ascii, for python versions prior to 3
| infer_datetime_format: boolean, default False
| If True and `parse_dates` is True for a column, try to infer the
| datetime format based on the first datetime string. If the format
| can be inferred, there often will be a large parsing speed-up.
|
| See also
| --------
| pandas.read_csv
|
| Returns
| -------
| y : Series
|
| ----------------------------------------------------------------------
| Data descriptors defined here:
|
| asobject
| return object Series which contains boxed values
|
| *this is an internal non-public method*
|
| axes
| Return a list of the row axis labels
|
| dtype
| return the dtype object of the underlying data
|
| dtypes
| return the dtype object of the underlying data
|
| ftype
| return if the data is sparse|dense
|
| ftypes
| return if the data is sparse|dense
|
| imag
|
| index
|
| is_time_series
|
| name
|
| real
|
| values
| Return Series as ndarray or ndarray-like
| depending on the dtype
|
| Returns
| -------
| arr : numpy.ndarray or ndarray-like
|
| Examples
| --------
| >>> pd.Series([1, 2, 3]).values
| array([1, 2, 3])
|
| >>> pd.Series(list('aabc')).values
| array(['a', 'a', 'b', 'c'], dtype=object)
|
| >>> pd.Series(list('aabc')).astype('category').values
| [a, a, b, c]
| Categories (3, object): [a, b, c]
|
| Timezone aware datetime data is converted to UTC:
|
| >>> pd.Series(pd.date_range('20130101', periods=3,
| tz='US/Eastern')).values
| array(['2013-01-01T00:00:00.000000000-0500',
| '2013-01-02T00:00:00.000000000-0500',
| '2013-01-03T00:00:00.000000000-0500'], dtype='datetime64[ns]')
|
| ----------------------------------------------------------------------
| Data and other attributes defined here:
|
| cat = <class 'pandas.core.categorical.CategoricalAccessor'>
| Accessor object for categorical properties of the Series values.
|
| Be aware that assigning to `categories` is a inplace operation, while all
| methods return new categorical data per default (but can be called with
| `inplace=True`).
|
| Examples
| --------
| >>> s.cat.categories
| >>> s.cat.categories = list('abc')
| >>> s.cat.rename_categories(list('cab'))
| >>> s.cat.reorder_categories(list('cab'))
| >>> s.cat.add_categories(['d','e'])
| >>> s.cat.remove_categories(['d'])
| >>> s.cat.remove_unused_categories()
| >>> s.cat.set_categories(list('abcde'))
| >>> s.cat.as_ordered()
| >>> s.cat.as_unordered()
|
| dt = <class 'pandas.tseries.common.CombinedDatetimelikeProperties'>
| Accessor object for datetimelike properties of the Series values.
|
| Examples
| --------
| >>> s.dt.hour
| >>> s.dt.second
| >>> s.dt.quarter
|
| Returns a Series indexed like the original Series.
| Raises TypeError if the Series does not contain datetimelike values.
|
| plot = <class 'pandas.tools.plotting.SeriesPlotMethods'>
| Series plotting accessor and method
|
| Examples
| --------
| >>> s.plot.line()
| >>> s.plot.bar()
| >>> s.plot.hist()
|
| Plotting methods can also be accessed by calling the accessor as a method
| with the ``kind`` argument:
| ``s.plot(kind='line')`` is equivalent to ``s.plot.line()``
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.base.IndexOpsMixin:
|
| factorize(self, sort=False, na_sentinel=-1)
| Encode the object as an enumerated type or categorical variable
|
| Parameters
| ----------
| sort : boolean, default False
| Sort by values
| na_sentinel: int, default -1
| Value to mark "not found"
|
| Returns
| -------
| labels : the indexer to the original array
| uniques : the unique Index
|
| item(self)
| return the first element of the underlying data as a python
| scalar
|
| nunique(self, dropna=True)
| Return number of unique elements in the object.
|
| Excludes NA values by default.
|
| Parameters
| ----------
| dropna : boolean, default True
| Don't include NaN in the count.
|
| Returns
| -------
| nunique : int
|
| transpose(self, *args, **kwargs)
| return the transpose, which is by definition self
|
| value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True)
| Returns object containing counts of unique values.
|
| The resulting object will be in descending order so that the
| first element is the most frequently-occurring element.
| Excludes NA values by default.
|
| Parameters
| ----------
| normalize : boolean, default False
| If True then the object returned will contain the relative
| frequencies of the unique values.
| sort : boolean, default True
| Sort by values
| ascending : boolean, default False
| Sort in ascending order
| bins : integer, optional
| Rather than count values, group them into half-open bins,
| a convenience for pd.cut, only works with numeric data
| dropna : boolean, default True
| Don't include counts of NaN.
|
| Returns
| -------
| counts : Series
|
| ----------------------------------------------------------------------
| Data descriptors inherited from pandas.core.base.IndexOpsMixin:
|
| T
| return the transpose, which is by definition self
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
|
| base
| return the base object if the memory of the underlying data is
| shared
|
| data
| return the data pointer of the underlying data
|
| flags
| return the ndarray.flags for the underlying data
|
| hasnans
|
| is_monotonic
| Return boolean if values in the object are
| monotonic_increasing
|
| .. versionadded:: 0.19.0
|
| Returns
| -------
| is_monotonic : boolean
|
| is_monotonic_decreasing
| Return boolean if values in the object are
| monotonic_decreasing
|
| .. versionadded:: 0.19.0
|
| Returns
| -------
| is_monotonic_decreasing : boolean
|
| is_monotonic_increasing
| Return boolean if values in the object are
| monotonic_increasing
|
| .. versionadded:: 0.19.0
|
| Returns
| -------
| is_monotonic : boolean
|
| is_unique
| Return boolean if values in the object are unique
|
| Returns
| -------
| is_unique : boolean
|
| itemsize
| return the size of the dtype of the item of the underlying data
|
| nbytes
| return the number of bytes in the underlying data
|
| ndim
| return the number of dimensions of the underlying data,
| by definition 1
|
| shape
| return a tuple of the shape of the underlying data
|
| size
| return the number of elements in the underlying data
|
| strides
| return the strides of the underlying data
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from pandas.core.base.IndexOpsMixin:
|
| __array_priority__ = 1000
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from pandas.core.strings.StringAccessorMixin:
|
| str = <class 'pandas.core.strings.StringMethods'>
| Vectorized string functions for Series and Index. NAs stay NA unless
| handled otherwise by a particular method. Patterned after Python's string
| methods, with some inspiration from R's stringr package.
|
| Examples
| --------
| >>> s.str.split('_')
| >>> s.str.replace('_', '')
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.generic.NDFrame:
|
| __abs__(self)
|
| __bool__ = __nonzero__(self)
|
| __contains__(self, key)
| True if the key is in the info axis
|
| __delitem__(self, key)
| Delete item
|
| __finalize__(self, other, method=None, **kwargs)
| Propagate metadata from other to self.
|
| Parameters
| ----------
| other : the object from which to get the attributes that we are going
| to propagate
| method : optional, a passed method name ; possibly to take different
| types of propagation actions based on this
|
| __getattr__(self, name)
| After regular attribute access, try looking up the name
| This allows simpler access to columns for interactive use.
|
| __getstate__(self)
|
| __hash__(self)
| Return hash(self).
|
| __invert__(self)
|
| __neg__(self)
|
| __nonzero__(self)
|
| __round__(self, decimals=0)
|
| __setattr__(self, name, value)
| After regular attribute access, try setting the name
| This allows simpler access to columns for interactive use.
|
| __setstate__(self, state)
|
| abs(self)
| Return an object with absolute value taken--only applicable to objects
| that are all numeric.
|
| Returns
| -------
| abs: type of caller
|
| add_prefix(self, prefix)
| Concatenate prefix string with panel items names.
|
| Parameters
| ----------
| prefix : string
|
| Returns
| -------
| with_prefix : type of caller
|
| add_suffix(self, suffix)
| Concatenate suffix string with panel items names.
|
| Parameters
| ----------
| suffix : string
|
| Returns
| -------
| with_suffix : type of caller
|
| as_blocks(self, copy=True)
| Convert the frame to a dict of dtype -> Constructor Types that each has
| a homogeneous dtype.
|
| NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
| as_matrix)
|
| Parameters
| ----------
| copy : boolean, default True
|
| .. versionadded: 0.16.1
|
| Returns
| -------
| values : a dict of dtype -> Constructor Types
|
| as_matrix(self, columns=None)
| Convert the frame to its Numpy-array representation.
|
| Parameters
| ----------
| columns: list, optional, default:None
| If None, return all columns, otherwise, returns specified columns.
|
| Returns
| -------
| values : ndarray
| If the caller is heterogeneous and contains booleans or objects,
| the result will be of dtype=object. See Notes.
|
|
| Notes
| -----
| Return is NOT a Numpy-matrix, rather, a Numpy-array.
|
| The dtype will be a lower-common-denominator dtype (implicit
| upcasting); that is to say if the dtypes (even of numeric types)
| are mixed, the one that accommodates all will be chosen. Use this
| with care if you are not dealing with the blocks.
|
| e.g. If the dtypes are float16 and float32, dtype will be upcast to
| float32. If dtypes are int32 and uint8, dtype will be upcase to
| int32. By numpy.find_common_type convention, mixing int64 and uint64
| will result in a flot64 dtype.
|
| This method is provided for backwards compatibility. Generally,
| it is recommended to use '.values'.
|
| See Also
| --------
| pandas.DataFrame.values
|
| asfreq(self, freq, method=None, how=None, normalize=False)
| Convert TimeSeries to specified frequency.
|
| Optionally provide filling method to pad/backfill missing values.
|
| Parameters
| ----------
| freq : DateOffset object, or string
| method : {'backfill'/'bfill', 'pad'/'ffill'}, default None
| Method to use for filling holes in reindexed Series (note this
| does not fill NaNs that already were present):
|
| * 'pad' / 'ffill': propagate last valid observation forward to next
| valid
| * 'backfill' / 'bfill': use NEXT valid observation to fill
| how : {'start', 'end'}, default end
| For PeriodIndex only, see PeriodIndex.asfreq
| normalize : bool, default False
| Whether to reset output index to midnight
|
| Returns
| -------
| converted : type of caller
|
| Notes
| -----
| To learn more about the frequency strings, please see `this link
| <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
|
| asof(self, where, subset=None)
| The last row without any NaN is taken (or the last row without
| NaN considering only the subset of columns in the case of a DataFrame)
|
| .. versionadded:: 0.19.0 For DataFrame
|
| If there is no good value, NaN is returned.
|
| Parameters
| ----------
| where : date or array of dates
| subset : string or list of strings, default None
| if not None use these columns for NaN propagation
|
| Notes
| -----
| Dates are assumed to be sorted
| Raises if this is not the case
|
| Returns
| -------
| where is scalar
|
| - value or NaN if input is Series
| - Series if input is DataFrame
|
| where is Index: same shape object as input
|
| See Also
| --------
| merge_asof
|
| astype(self, dtype, copy=True, raise_on_error=True, **kwargs)
| Cast object to input numpy.dtype
| Return a copy when copy = True (be really careful with this!)
|
| Parameters
| ----------
| dtype : data type, or dict of column name -> data type
| Use a numpy.dtype or Python type to cast entire pandas object to
| the same type. Alternatively, use {col: dtype, ...}, where col is a
| column label and dtype is a numpy.dtype or Python type to cast one
| or more of the DataFrame's columns to column-specific types.
| raise_on_error : raise on invalid input
| kwargs : keyword arguments to pass on to the constructor
|
| Returns
| -------
| casted : type of caller
|
| at_time(self, time, asof=False)
| Select values at particular time of day (e.g. 9:30AM).
|
| Parameters
| ----------
| time : datetime.time or string
|
| Returns
| -------
| values_at_time : type of caller
|
| between_time(self, start_time, end_time, include_start=True, include_end=True)
| Select values between particular times of the day (e.g., 9:00-9:30 AM).
|
| Parameters
| ----------
| start_time : datetime.time or string
| end_time : datetime.time or string
| include_start : boolean, default True
| include_end : boolean, default True
|
| Returns
| -------
| values_between_time : type of caller
|
| bfill(self, axis=None, inplace=False, limit=None, downcast=None)
| Synonym for NDFrame.fillna(method='bfill')
|
| bool(self)
| Return the bool of a single element PandasObject.
|
| This must be a boolean scalar value, either True or False. Raise a
| ValueError if the PandasObject does not have exactly 1 element, or that
| element is not boolean
|
| clip(self, lower=None, upper=None, axis=None, *args, **kwargs)
| Trim values at input threshold(s).
|
| Parameters
| ----------
| lower : float or array_like, default None
| upper : float or array_like, default None
| axis : int or string axis name, optional
| Align object with lower and upper along the given axis.
|
| Returns
| -------
| clipped : Series
|
| Examples
| --------
| >>> df
| 0 1
| 0 0.335232 -1.256177
| 1 -1.367855 0.746646
| 2 0.027753 -1.176076
| 3 0.230930 -0.679613
| 4 1.261967 0.570967
| >>> df.clip(-1.0, 0.5)
| 0 1
| 0 0.335232 -1.000000
| 1 -1.000000 0.500000
| 2 0.027753 -1.000000
| 3 0.230930 -0.679613
| 4 0.500000 0.500000
| >>> t
| 0 -0.3
| 1 -0.2
| 2 -0.1
| 3 0.0
| 4 0.1
| dtype: float64
| >>> df.clip(t, t + 1, axis=0)
| 0 1
| 0 0.335232 -0.300000
| 1 -0.200000 0.746646
| 2 0.027753 -0.100000
| 3 0.230930 0.000000
| 4 1.100000 0.570967
|
| clip_lower(self, threshold, axis=None)
| Return copy of the input with values below given value(s) truncated.
|
| Parameters
| ----------
| threshold : float or array_like
| axis : int or string axis name, optional
| Align object with threshold along the given axis.
|
| See Also
| --------
| clip
|
| Returns
| -------
| clipped : same type as input
|
| clip_upper(self, threshold, axis=None)
| Return copy of input with values above given value(s) truncated.
|
| Parameters
| ----------
| threshold : float or array_like
| axis : int or string axis name, optional
| Align object with threshold along the given axis.
|
| See Also
| --------
| clip
|
| Returns
| -------
| clipped : same type as input
|
| consolidate(self, inplace=False)
| Compute NDFrame with "consolidated" internals (data of each dtype
| grouped together in a single ndarray). Mainly an internal API function,
| but available here to the savvy user
|
| Parameters
| ----------
| inplace : boolean, default False
| If False return new object, otherwise modify existing object
|
| Returns
| -------
| consolidated : type of caller
|
| convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=True, copy=True)
| Deprecated.
|
| Attempt to infer better dtype for object columns
|
| Parameters
| ----------
| convert_dates : boolean, default True
| If True, convert to date where possible. If 'coerce', force
| conversion, with unconvertible values becoming NaT.
| convert_numeric : boolean, default False
| If True, attempt to coerce to numbers (including strings), with
| unconvertible values becoming NaN.
| convert_timedeltas : boolean, default True
| If True, convert to timedelta where possible. If 'coerce', force
| conversion, with unconvertible values becoming NaT.
| copy : boolean, default True
| If True, return a copy even if no copy is necessary (e.g. no
| conversion was done). Note: This is meant for internal use, and
| should not be confused with inplace.
|
| See Also
| --------
| pandas.to_datetime : Convert argument to datetime.
| pandas.to_timedelta : Convert argument to timedelta.
| pandas.to_numeric : Return a fixed frequency timedelta index,
| with day as the default.
|
| Returns
| -------
| converted : same as input object
|
| copy(self, deep=True)
| Make a copy of this objects data.
|
| Parameters
| ----------
| deep : boolean or string, default True
| Make a deep copy, including a copy of the data and the indices.
| With ``deep=False`` neither the indices or the data are copied.
|
| Note that when ``deep=True`` data is copied, actual python objects
| will not be copied recursively, only the reference to the object.
| This is in contrast to ``copy.deepcopy`` in the Standard Library,
| which recursively copies object data.
|
| Returns
| -------
| copy : type of caller
|
| describe(self, percentiles=None, include=None, exclude=None)
| Generate various summary statistics, excluding NaN values.
|
| Parameters
| ----------
| percentiles : array-like, optional
| The percentiles to include in the output. Should all
| be in the interval [0, 1]. By default `percentiles` is
| [.25, .5, .75], returning the 25th, 50th, and 75th percentiles.
| include, exclude : list-like, 'all', or None (default)
| Specify the form of the returned result. Either:
|
| - None to both (default). The result will include only
| numeric-typed columns or, if none are, only categorical columns.
| - A list of dtypes or strings to be included/excluded.
| To select all numeric types use numpy numpy.number. To select
| categorical objects use type object. See also the select_dtypes
| documentation. eg. df.describe(include=['O'])
| - If include is the string 'all', the output column-set will
| match the input one.
|
| Returns
| -------
| summary: NDFrame of summary statistics
|
| Notes
| -----
| The output DataFrame index depends on the requested dtypes:
|
| For numeric dtypes, it will include: count, mean, std, min,
| max, and lower, 50, and upper percentiles.
|
| For object dtypes (e.g. timestamps or strings), the index
| will include the count, unique, most common, and frequency of the
| most common. Timestamps also include the first and last items.
|
| For mixed dtypes, the index will be the union of the corresponding
| output types. Non-applicable entries will be filled with NaN.
| Note that mixed-dtype outputs can only be returned from mixed-dtype
| inputs and appropriate use of the include/exclude arguments.
|
| If multiple values have the highest count, then the
| `count` and `most common` pair will be arbitrarily chosen from
| among those with the highest count.
|
| The include, exclude arguments are ignored for Series.
|
| See Also
| --------
| DataFrame.select_dtypes
|
| drop(self, labels, axis=0, level=None, inplace=False, errors='raise')
| Return new object with labels in requested axis removed.
|
| Parameters
| ----------
| labels : single label or list-like
| axis : int or axis name
| level : int or level name, default None
| For MultiIndex
| inplace : bool, default False
| If True, do operation inplace and return None.
| errors : {'ignore', 'raise'}, default 'raise'
| If 'ignore', suppress error and existing labels are dropped.
|
| .. versionadded:: 0.16.1
|
| Returns
| -------
| dropped : type of caller
|
| equals(self, other)
| Determines if two NDFrame objects contain the same elements. NaNs in
| the same location are considered equal.
|
| ffill(self, axis=None, inplace=False, limit=None, downcast=None)
| Synonym for NDFrame.fillna(method='ffill')
|
| filter(self, items=None, like=None, regex=None, axis=None)
| Subset rows or columns of dataframe according to labels in
| the specified index.
|
| Note that this routine does not filter a dataframe on its
| contents. The filter is applied to the labels of the index.
|
| Parameters
| ----------
| items : list-like
| List of info axis to restrict to (must not all be present)
| like : string
| Keep info axis where "arg in col == True"
| regex : string (regular expression)
| Keep info axis with re.search(regex, col) == True
| axis : int or string axis name
| The axis to filter on. By default this is the info axis,
| 'index' for Series, 'columns' for DataFrame
|
| Returns
| -------
| same type as input object
|
| Examples
| --------
| >>> df
| one two three
| mouse 1 2 3
| rabbit 4 5 6
|
| >>> # select columns by name
| >>> df.filter(items=['one', 'three'])
| one three
| mouse 1 3
| rabbit 4 6
|
| >>> # select columns by regular expression
| >>> df.filter(regex='e$', axis=1)
| one three
| mouse 1 3
| rabbit 4 6
|
| >>> # select rows containing 'bbi'
| >>> df.filter(like='bbi', axis=0)
| one two three
| rabbit 4 5 6
|
| See Also
| --------
| pandas.DataFrame.select
|
| Notes
| -----
| The ``items``, ``like``, and ``regex`` parameters are
| enforced to be mutually exclusive.
|
| ``axis`` defaults to the info axis that is used when indexing
| with ``[]``.
|
| first(self, offset)
| Convenience method for subsetting initial periods of time series data
| based on a date offset.
|
| Parameters
| ----------
| offset : string, DateOffset, dateutil.relativedelta
|
| Examples
| --------
| ts.first('10D') -> First 10 days
|
| Returns
| -------
| subset : type of caller
|
| get(self, key, default=None)
| Get item from object for given key (DataFrame column, Panel slice,
| etc.). Returns default value if not found.
|
| Parameters
| ----------
| key : object
|
| Returns
| -------
| value : type of items contained in object
|
| get_dtype_counts(self)
| Return the counts of dtypes in this object.
|
| get_ftype_counts(self)
| Return the counts of ftypes in this object.
|
| groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, **kwargs)
| Group series using mapper (dict or key function, apply given function
| to group, return result as series) or by a series of columns.
|
| Parameters
| ----------
| by : mapping function / list of functions, dict, Series, or tuple /
| list of column names.
| Called on each element of the object index to determine the groups.
| If a dict or Series is passed, the Series or dict VALUES will be
| used to determine the groups
| axis : int, default 0
| level : int, level name, or sequence of such, default None
| If the axis is a MultiIndex (hierarchical), group by a particular
| level or levels
| as_index : boolean, default True
| For aggregated output, return object with group labels as the
| index. Only relevant for DataFrame input. as_index=False is
| effectively "SQL-style" grouped output
| sort : boolean, default True
| Sort group keys. Get better performance by turning this off.
| Note this does not influence the order of observations within each
| group. groupby preserves the order of rows within each group.
| group_keys : boolean, default True
| When calling apply, add group keys to index to identify pieces
| squeeze : boolean, default False
| reduce the dimensionality of the return type if possible,
| otherwise return a consistent type
|
| Examples
| --------
| DataFrame results
|
| >>> data.groupby(func, axis=0).mean()
| >>> data.groupby(['col1', 'col2'])['col3'].mean()
|
| DataFrame with hierarchical index
|
| >>> data.groupby(['col1', 'col2']).mean()
|
| Returns
| -------
| GroupBy object
|
| head(self, n=5)
| Returns first n rows
|
| interpolate(self, method='linear', axis=0, limit=None, inplace=False, limit_direction='forward', downcast=None, **kwargs)
| Interpolate values according to different methods.
|
| Please note that only ``method='linear'`` is supported for
| DataFrames/Series with a MultiIndex.
|
| Parameters
| ----------
| method : {'linear', 'time', 'index', 'values', 'nearest', 'zero',
| 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
| 'polynomial', 'spline', 'piecewise_polynomial',
| 'from_derivatives', 'pchip', 'akima'}
|
| * 'linear': ignore the index and treat the values as equally
| spaced. This is the only method supported on MultiIndexes.
| default
| * 'time': interpolation works on daily and higher resolution
| data to interpolate given length of interval
| * 'index', 'values': use the actual numerical values of the index
| * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
| 'barycentric', 'polynomial' is passed to
| ``scipy.interpolate.interp1d``. Both 'polynomial' and 'spline'
| require that you also specify an `order` (int),
| e.g. df.interpolate(method='polynomial', order=4).
| These use the actual numerical values of the index.
| * 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' are all
| wrappers around the scipy interpolation methods of similar
| names. These use the actual numerical values of the index. See
| the scipy documentation for more on their behavior
| `here <http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__ # noqa
| `and here <http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html>`__ # noqa
| * 'from_derivatives' refers to BPoly.from_derivatives which
| replaces 'piecewise_polynomial' interpolation method in scipy 0.18
|
| .. versionadded:: 0.18.1
|
| Added support for the 'akima' method
| Added interpolate method 'from_derivatives' which replaces
| 'piecewise_polynomial' in scipy 0.18; backwards-compatible with
| scipy < 0.18
|
| axis : {0, 1}, default 0
| * 0: fill column-by-column
| * 1: fill row-by-row
| limit : int, default None.
| Maximum number of consecutive NaNs to fill.
| limit_direction : {'forward', 'backward', 'both'}, defaults to 'forward'
| If limit is specified, consecutive NaNs will be filled in this
| direction.
|
| .. versionadded:: 0.17.0
|
| inplace : bool, default False
| Update the NDFrame in place if possible.
| downcast : optional, 'infer' or None, defaults to None
| Downcast dtypes if possible.
| kwargs : keyword arguments to pass on to the interpolating function.
|
| Returns
| -------
| Series or DataFrame of same shape interpolated at the NaNs
|
| See Also
| --------
| reindex, replace, fillna
|
| Examples
| --------
|
| Filling in NaNs
|
| >>> s = pd.Series([0, 1, np.nan, 3])
| >>> s.interpolate()
| 0 0
| 1 1
| 2 2
| 3 3
| dtype: float64
|
| isnull(self)
| Return a boolean same-sized object indicating if the values are null.
|
| See Also
| --------
| notnull : boolean inverse of isnull
|
| iterkv(self, *args, **kwargs)
| iteritems alias used to get around 2to3. Deprecated
|
| last(self, offset)
| Convenience method for subsetting final periods of time series data
| based on a date offset.
|
| Parameters
| ----------
| offset : string, DateOffset, dateutil.relativedelta
|
| Examples
| --------
| ts.last('5M') -> Last 5 months
|
| Returns
| -------
| subset : type of caller
|
| mask(self, cond, other=nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True)
| Return an object of same shape as self and whose corresponding
| entries are from self where cond is False and otherwise are from
| other.
|
| Parameters
| ----------
| cond : boolean NDFrame, array or callable
| If cond is callable, it is computed on the NDFrame and
| should return boolean NDFrame or array.
| The callable must not change input NDFrame
| (though pandas doesn't check it).
|
| .. versionadded:: 0.18.1
|
| A callable can be used as cond.
|
| other : scalar, NDFrame, or callable
| If other is callable, it is computed on the NDFrame and
| should return scalar or NDFrame.
| The callable must not change input NDFrame
| (though pandas doesn't check it).
|
| .. versionadded:: 0.18.1
|
| A callable can be used as other.
|
| inplace : boolean, default False
| Whether to perform the operation in place on the data
| axis : alignment axis if needed, default None
| level : alignment level if needed, default None
| try_cast : boolean, default False
| try to cast the result back to the input type (if possible),
| raise_on_error : boolean, default True
| Whether to raise on invalid data types (e.g. trying to where on
| strings)
|
| Returns
| -------
| wh : same type as caller
|
| Notes
| -----
| The mask method is an application of the if-then idiom. For each
| element in the calling DataFrame, if ``cond`` is ``False`` the
| element is used; otherwise the corresponding element from the DataFrame
| ``other`` is used.
|
| The signature for :func:`DataFrame.where` differs from
| :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to
| ``np.where(m, df1, df2)``.
|
| For further details and examples see the ``mask`` documentation in
| :ref:`indexing <indexing.where_mask>`.
|
| Examples
| --------
| >>> s = pd.Series(range(5))
| >>> s.where(s > 0)
| 0 NaN
| 1 1.0
| 2 2.0
| 3 3.0
| 4 4.0
|
| >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
| >>> m = df % 3 == 0
| >>> df.where(m, -df)
| A B
| 0 0 -1
| 1 -2 3
| 2 -4 -5
| 3 6 -7
| 4 -8 9
| >>> df.where(m, -df) == np.where(m, df, -df)
| A B
| 0 True True
| 1 True True
| 2 True True
| 3 True True
| 4 True True
| >>> df.where(m, -df) == df.mask(~m, -df)
| A B
| 0 True True
| 1 True True
| 2 True True
| 3 True True
| 4 True True
|
| See Also
| --------
| :func:`DataFrame.where`
|
| notnull(self)
| Return a boolean same-sized object indicating if the values are
| not null.
|
| See Also
| --------
| isnull : boolean inverse of notnull
|
| pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwargs)
| Percent change over given number of periods.
|
| Parameters
| ----------
| periods : int, default 1
| Periods to shift for forming percent change
| fill_method : str, default 'pad'
| How to handle NAs before computing percent changes
| limit : int, default None
| The number of consecutive NAs to fill before stopping
| freq : DateOffset, timedelta, or offset alias string, optional
| Increment to use from time series API (e.g. 'M' or BDay())
|
| Returns
| -------
| chg : NDFrame
|
| Notes
| -----
|
| By default, the percentage change is calculated along the stat
| axis: 0, or ``Index``, for ``DataFrame`` and 1, or ``minor`` for
| ``Panel``. You can change this with the ``axis`` keyword argument.
|
| pipe(self, func, *args, **kwargs)
| Apply func(self, \*args, \*\*kwargs)
|
| .. versionadded:: 0.16.2
|
| Parameters
| ----------
| func : function
| function to apply to the NDFrame.
| ``args``, and ``kwargs`` are passed into ``func``.
| Alternatively a ``(callable, data_keyword)`` tuple where
| ``data_keyword`` is a string indicating the keyword of
| ``callable`` that expects the NDFrame.
| args : positional arguments passed into ``func``.
| kwargs : a dictionary of keyword arguments passed into ``func``.
|
| Returns
| -------
| object : the return type of ``func``.
|
| Notes
| -----
|
| Use ``.pipe`` when chaining together functions that expect
| on Series or DataFrames. Instead of writing
|
| >>> f(g(h(df), arg1=a), arg2=b, arg3=c)
|
| You can write
|
| >>> (df.pipe(h)
| ... .pipe(g, arg1=a)
| ... .pipe(f, arg2=b, arg3=c)
| ... )
|
| If you have a function that takes the data as (say) the second
| argument, pass a tuple indicating which keyword expects the
| data. For example, suppose ``f`` takes its data as ``arg2``:
|
| >>> (df.pipe(h)
| ... .pipe(g, arg1=a)
| ... .pipe((f, 'arg2'), arg1=a, arg3=c)
| ... )
|
| See Also
| --------
| pandas.DataFrame.apply
| pandas.DataFrame.applymap
| pandas.Series.map
|
| pop(self, item)
| Return item and drop from frame. Raise KeyError if not found.
|
| rank(self, axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False)
| Compute numerical data ranks (1 through n) along axis. Equal values are
| assigned a rank that is the average of the ranks of those values
|
| Parameters
| ----------
| axis: {0 or 'index', 1 or 'columns'}, default 0
| index to direct ranking
| method : {'average', 'min', 'max', 'first', 'dense'}
| * average: average rank of group
| * min: lowest rank in group
| * max: highest rank in group
| * first: ranks assigned in order they appear in the array
| * dense: like 'min', but rank always increases by 1 between groups
| numeric_only : boolean, default None
| Include only float, int, boolean data. Valid only for DataFrame or
| Panel objects
| na_option : {'keep', 'top', 'bottom'}
| * keep: leave NA values where they are
| * top: smallest rank if ascending
| * bottom: smallest rank if descending
| ascending : boolean, default True
| False for ranks by high (1) to low (N)
| pct : boolean, default False
| Computes percentage rank of data
|
| Returns
| -------
| ranks : same type as caller
|
| reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None)
| Return an object with matching indices to myself.
|
| Parameters
| ----------
| other : Object
| method : string or None
| copy : boolean, default True
| limit : int, default None
| Maximum number of consecutive labels to fill for inexact matches.
| tolerance : optional
| Maximum distance between labels of the other object and this
| object for inexact matches.
|
| .. versionadded:: 0.17.0
|
| Notes
| -----
| Like calling s.reindex(index=other.index, columns=other.columns,
| method=...)
|
| Returns
| -------
| reindexed : same as input
|
| rename_axis(self, mapper, axis=0, copy=True, inplace=False)
| Alter index and / or columns using input function or functions.
| A scaler or list-like for ``mapper`` will alter the ``Index.name``
| or ``MultiIndex.names`` attribute.
| A function or dict for ``mapper`` will alter the labels.
| Function / dict values must be unique (1-to-1). Labels not contained in
| a dict / Series will be left as-is.
|
| Parameters
| ----------
| mapper : scalar, list-like, dict-like or function, optional
| axis : int or string, default 0
| copy : boolean, default True
| Also copy underlying data
| inplace : boolean, default False
|
| Returns
| -------
| renamed : type of caller
|
| See Also
| --------
| pandas.NDFrame.rename
| pandas.Index.rename
|
| Examples
| --------
| >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
| >>> df.rename_axis("foo") # scalar, alters df.index.name
| A B
| foo
| 0 1 4
| 1 2 5
| 2 3 6
| >>> df.rename_axis(lambda x: 2 * x) # function: alters labels
| A B
| 0 1 4
| 2 2 5
| 4 3 6
| >>> df.rename_axis({"A": "ehh", "C": "see"}, axis="columns") # mapping
| ehh B
| 0 1 4
| 1 2 5
| 2 3 6
|
| replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None)
| Replace values given in 'to_replace' with 'value'.
|
| Parameters
| ----------
| to_replace : str, regex, list, dict, Series, numeric, or None
|
| * str or regex:
|
| - str: string exactly matching `to_replace` will be replaced
| with `value`
| - regex: regexs matching `to_replace` will be replaced with
| `value`
|
| * list of str, regex, or numeric:
|
| - First, if `to_replace` and `value` are both lists, they
| **must** be the same length.
| - Second, if ``regex=True`` then all of the strings in **both**
| lists will be interpreted as regexs otherwise they will match
| directly. This doesn't matter much for `value` since there
| are only a few possible substitution regexes you can use.
| - str and regex rules apply as above.
|
| * dict:
|
| - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as
| follows: look in column 'a' for the value 'b' and replace it
| with nan. You can nest regular expressions as well. Note that
| column names (the top-level dictionary keys in a nested
| dictionary) **cannot** be regular expressions.
| - Keys map to column names and values map to substitution
| values. You can treat this as a special case of passing two
| lists except that you are specifying the column to search in.
|
| * None:
|
| - This means that the ``regex`` argument must be a string,
| compiled regular expression, or list, dict, ndarray or Series
| of such elements. If `value` is also ``None`` then this
| **must** be a nested dictionary or ``Series``.
|
| See the examples section for examples of each of these.
| value : scalar, dict, list, str, regex, default None
| Value to use to fill holes (e.g. 0), alternately a dict of values
| specifying which value to use for each column (columns not in the
| dict will not be filled). Regular expressions, strings and lists or
| dicts of such objects are also allowed.
| inplace : boolean, default False
| If True, in place. Note: this will modify any
| other views on this object (e.g. a column form a DataFrame).
| Returns the caller if this is True.
| limit : int, default None
| Maximum size gap to forward or backward fill
| regex : bool or same types as `to_replace`, default False
| Whether to interpret `to_replace` and/or `value` as regular
| expressions. If this is ``True`` then `to_replace` *must* be a
| string. Otherwise, `to_replace` must be ``None`` because this
| parameter will be interpreted as a regular expression or a list,
| dict, or array of regular expressions.
| method : string, optional, {'pad', 'ffill', 'bfill'}
| The method to use when for replacement, when ``to_replace`` is a
| ``list``.
|
| See Also
| --------
| NDFrame.reindex
| NDFrame.asfreq
| NDFrame.fillna
|
| Returns
| -------
| filled : NDFrame
|
| Raises
| ------
| AssertionError
| * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
| TypeError
| * If `to_replace` is a ``dict`` and `value` is not a ``list``,
| ``dict``, ``ndarray``, or ``Series``
| * If `to_replace` is ``None`` and `regex` is not compilable into a
| regular expression or is a list, dict, ndarray, or Series.
| ValueError
| * If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but
| they are not the same length.
|
| Notes
| -----
| * Regex substitution is performed under the hood with ``re.sub``. The
| rules for substitution for ``re.sub`` are the same.
| * Regular expressions will only substitute on strings, meaning you
| cannot provide, for example, a regular expression matching floating
| point numbers and expect the columns in your frame that have a
| numeric dtype to be matched. However, if those floating point numbers
| *are* strings, then you can do this.
| * This method has *a lot* of options. You are encouraged to experiment
| and play with this method to gain intuition about how it works.
|
| resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', kind=None, loffset=None, limit=None, base=0, on=None, level=None)
| Convenience method for frequency conversion and resampling of time
| series. Object must have a datetime-like index (DatetimeIndex,
| PeriodIndex, or TimedeltaIndex), or pass datetime-like values
| to the on or level keyword.
|
| Parameters
| ----------
| rule : string
| the offset string or object representing target conversion
| axis : int, optional, default 0
| closed : {'right', 'left'}
| Which side of bin interval is closed
| label : {'right', 'left'}
| Which bin edge label to label bucket with
| convention : {'start', 'end', 's', 'e'}
| loffset : timedelta
| Adjust the resampled time labels
| base : int, default 0
| For frequencies that evenly subdivide 1 day, the "origin" of the
| aggregated intervals. For example, for '5min' frequency, base could
| range from 0 through 4. Defaults to 0
| on : string, optional
| For a DataFrame, column to use instead of index for resampling.
| Column must be datetime-like.
|
| .. versionadded:: 0.19.0
|
| level : string or int, optional
| For a MultiIndex, level (name or number) to use for
| resampling. Level must be datetime-like.
|
| .. versionadded:: 0.19.0
|
| To learn more about the offset strings, please see `this link
| <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
|
| Examples
| --------
|
| Start by creating a series with 9 one minute timestamps.
|
| >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
| >>> series = pd.Series(range(9), index=index)
| >>> series
| 2000-01-01 00:00:00 0
| 2000-01-01 00:01:00 1
| 2000-01-01 00:02:00 2
| 2000-01-01 00:03:00 3
| 2000-01-01 00:04:00 4
| 2000-01-01 00:05:00 5
| 2000-01-01 00:06:00 6
| 2000-01-01 00:07:00 7
| 2000-01-01 00:08:00 8
| Freq: T, dtype: int64
|
| Downsample the series into 3 minute bins and sum the values
| of the timestamps falling into a bin.
|
| >>> series.resample('3T').sum()
| 2000-01-01 00:00:00 3
| 2000-01-01 00:03:00 12
| 2000-01-01 00:06:00 21
| Freq: 3T, dtype: int64
|
| Downsample the series into 3 minute bins as above, but label each
| bin using the right edge instead of the left. Please note that the
| value in the bucket used as the label is not included in the bucket,
| which it labels. For example, in the original series the
| bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
| value in the resampled bucket with the label``2000-01-01 00:03:00``
| does not include 3 (if it did, the summed value would be 6, not 3).
| To include this value close the right side of the bin interval as
| illustrated in the example below this one.
|
| >>> series.resample('3T', label='right').sum()
| 2000-01-01 00:03:00 3
| 2000-01-01 00:06:00 12
| 2000-01-01 00:09:00 21
| Freq: 3T, dtype: int64
|
| Downsample the series into 3 minute bins as above, but close the right
| side of the bin interval.
|
| >>> series.resample('3T', label='right', closed='right').sum()
| 2000-01-01 00:00:00 0
| 2000-01-01 00:03:00 6
| 2000-01-01 00:06:00 15
| 2000-01-01 00:09:00 15
| Freq: 3T, dtype: int64
|
| Upsample the series into 30 second bins.
|
| >>> series.resample('30S').asfreq()[0:5] #select first 5 rows
| 2000-01-01 00:00:00 0
| 2000-01-01 00:00:30 NaN
| 2000-01-01 00:01:00 1
| 2000-01-01 00:01:30 NaN
| 2000-01-01 00:02:00 2
| Freq: 30S, dtype: float64
|
| Upsample the series into 30 second bins and fill the ``NaN``
| values using the ``pad`` method.
|
| >>> series.resample('30S').pad()[0:5]
| 2000-01-01 00:00:00 0
| 2000-01-01 00:00:30 0
| 2000-01-01 00:01:00 1
| 2000-01-01 00:01:30 1
| 2000-01-01 00:02:00 2
| Freq: 30S, dtype: int64
|
| Upsample the series into 30 second bins and fill the
| ``NaN`` values using the ``bfill`` method.
|
| >>> series.resample('30S').bfill()[0:5]
| 2000-01-01 00:00:00 0
| 2000-01-01 00:00:30 1
| 2000-01-01 00:01:00 1
| 2000-01-01 00:01:30 2
| 2000-01-01 00:02:00 2
| Freq: 30S, dtype: int64
|
| Pass a custom function via ``apply``
|
| >>> def custom_resampler(array_like):
| ... return np.sum(array_like)+5
|
| >>> series.resample('3T').apply(custom_resampler)
| 2000-01-01 00:00:00 8
| 2000-01-01 00:03:00 17
| 2000-01-01 00:06:00 26
| Freq: 3T, dtype: int64
|
| sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None)
| Returns a random sample of items from an axis of object.
|
| .. versionadded:: 0.16.1
|
| Parameters
| ----------
| n : int, optional
| Number of items from axis to return. Cannot be used with `frac`.
| Default = 1 if `frac` = None.
| frac : float, optional
| Fraction of axis items to return. Cannot be used with `n`.
| replace : boolean, optional
| Sample with or without replacement. Default = False.
| weights : str or ndarray-like, optional
| Default 'None' results in equal probability weighting.
| If passed a Series, will align with target object on index. Index
| values in weights not found in sampled object will be ignored and
| index values in sampled object not in weights will be assigned
| weights of zero.
| If called on a DataFrame, will accept the name of a column
| when axis = 0.
| Unless weights are a Series, weights must be same length as axis
| being sampled.
| If weights do not sum to 1, they will be normalized to sum to 1.
| Missing values in the weights column will be treated as zero.
| inf and -inf values not allowed.
| random_state : int or numpy.random.RandomState, optional
| Seed for the random number generator (if int), or numpy RandomState
| object.
| axis : int or string, optional
| Axis to sample. Accepts axis number or name. Default is stat axis
| for given data type (0 for Series and DataFrames, 1 for Panels).
|
| Returns
| -------
| A new object of same type as caller.
|
| Examples
| --------
|
| Generate an example ``Series`` and ``DataFrame``:
|
| >>> s = pd.Series(np.random.randn(50))
| >>> s.head()
| 0 -0.038497
| 1 1.820773
| 2 -0.972766
| 3 -1.598270
| 4 -1.095526
| dtype: float64
| >>> df = pd.DataFrame(np.random.randn(50, 4), columns=list('ABCD'))
| >>> df.head()
| A B C D
| 0 0.016443 -2.318952 -0.566372 -1.028078
| 1 -1.051921 0.438836 0.658280 -0.175797
| 2 -1.243569 -0.364626 -0.215065 0.057736
| 3 1.768216 0.404512 -0.385604 -1.457834
| 4 1.072446 -1.137172 0.314194 -0.046661
|
| Next extract a random sample from both of these objects...
|
| 3 random elements from the ``Series``:
|
| >>> s.sample(n=3)
| 27 -0.994689
| 55 -1.049016
| 67 -0.224565
| dtype: float64
|
| And a random 10% of the ``DataFrame`` with replacement:
|
| >>> df.sample(frac=0.1, replace=True)
| A B C D
| 35 1.981780 0.142106 1.817165 -0.290805
| 49 -1.336199 -0.448634 -0.789640 0.217116
| 40 0.823173 -0.078816 1.009536 1.015108
| 15 1.421154 -0.055301 -1.922594 -0.019696
| 6 -0.148339 0.832938 1.787600 -1.383767
|
| select(self, crit, axis=0)
| Return data corresponding to axis labels matching criteria
|
| Parameters
| ----------
| crit : function
| To be called on each index (label). Should return True or False
| axis : int
|
| Returns
| -------
| selection : type of caller
|
| set_axis(self, axis, labels)
| public verson of axis assignment
|
| slice_shift(self, periods=1, axis=0)
| Equivalent to `shift` without copying data. The shifted data will
| not include the dropped periods and the shifted axis will be smaller
| than the original.
|
| Parameters
| ----------
| periods : int
| Number of periods to move, can be positive or negative
|
| Notes
| -----
| While the `slice_shift` is faster than `shift`, you may pay for it
| later during alignment.
|
| Returns
| -------
| shifted : same type as caller
|
| squeeze(self, **kwargs)
| Squeeze length 1 dimensions.
|
| swapaxes(self, axis1, axis2, copy=True)
| Interchange axes and swap values axes appropriately
|
| Returns
| -------
| y : same as input
|
| tail(self, n=5)
| Returns last n rows
|
| to_clipboard(self, excel=None, sep=None, **kwargs)
| Attempt to write text representation of object to the system clipboard
| This can be pasted into Excel, for example.
|
| Parameters
| ----------
| excel : boolean, defaults to True
| if True, use the provided separator, writing in a csv
| format for allowing easy pasting into excel.
| if False, write a string representation of the object
| to the clipboard
| sep : optional, defaults to tab
| other keywords are passed to to_csv
|
| Notes
| -----
| Requirements for your platform
| - Linux: xclip, or xsel (with gtk or PyQt4 modules)
| - Windows: none
| - OS X: none
|
| to_dense(self)
| Return dense representation of NDFrame (as opposed to sparse)
|
| to_hdf(self, path_or_buf, key, **kwargs)
| Write the contained data to an HDF5 file using HDFStore.
|
| Parameters
| ----------
| path_or_buf : the path (string) or HDFStore object
| key : string
| indentifier for the group in the store
| mode : optional, {'a', 'w', 'r+'}, default 'a'
|
| ``'w'``
| Write; a new file is created (an existing file with the same
| name would be deleted).
| ``'a'``
| Append; an existing file is opened for reading and writing,
| and if the file does not exist it is created.
| ``'r+'``
| It is similar to ``'a'``, but the file must already exist.
| format : 'fixed(f)|table(t)', default is 'fixed'
| fixed(f) : Fixed format
| Fast writing/reading. Not-appendable, nor searchable
| table(t) : Table format
| Write as a PyTables Table structure which may perform
| worse but allow more flexible operations like searching
| / selecting subsets of the data
| append : boolean, default False
| For Table formats, append the input data to the existing
| data_columns : list of columns, or True, default None
| List of columns to create as indexed data columns for on-disk
| queries, or True to use all columns. By default only the axes
| of the object are indexed. See `here
| <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__.
|
| Applicable only to format='table'.
| complevel : int, 1-9, default 0
| If a complib is specified compression will be applied
| where possible
| complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None
| If complevel is > 0 apply compression to objects written
| in the store wherever possible
| fletcher32 : bool, default False
| If applying compression use the fletcher32 checksum
| dropna : boolean, default False.
| If true, ALL nan rows will not be written to store.
|
| to_json(self, path_or_buf=None, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False)
| Convert the object to a JSON string.
|
| Note NaN's and None will be converted to null and datetime objects
| will be converted to UNIX timestamps.
|
| Parameters
| ----------
| path_or_buf : the path or buffer to write the result string
| if this is None, return a StringIO of the converted string
| orient : string
|
| * Series
|
| - default is 'index'
| - allowed values are: {'split','records','index'}
|
| * DataFrame
|
| - default is 'columns'
| - allowed values are:
| {'split','records','index','columns','values'}
|
| * The format of the JSON string
|
| - split : dict like
| {index -> [index], columns -> [columns], data -> [values]}
| - records : list like
| [{column -> value}, ... , {column -> value}]
| - index : dict like {index -> {column -> value}}
| - columns : dict like {column -> {index -> value}}
| - values : just the values array
|
| date_format : {'epoch', 'iso'}
| Type of date conversion. `epoch` = epoch milliseconds,
| `iso`` = ISO8601, default is epoch.
| double_precision : The number of decimal places to use when encoding
| floating point values, default 10.
| force_ascii : force encoded string to be ASCII, default True.
| date_unit : string, default 'ms' (milliseconds)
| The time unit to encode to, governs timestamp and ISO8601
| precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
| microsecond, and nanosecond respectively.
| default_handler : callable, default None
| Handler to call if object cannot otherwise be converted to a
| suitable format for JSON. Should receive a single argument which is
| the object to convert and return a serialisable object.
| lines : boolean, defalut False
| If 'orient' is 'records' write out line delimited json format. Will
| throw ValueError if incorrect 'orient' since others are not list
| like.
|
| .. versionadded:: 0.19.0
|
|
| Returns
| -------
| same type as input object with filtered info axis
|
| to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs)
| msgpack (serialize) object to input file path
|
| THIS IS AN EXPERIMENTAL LIBRARY and the storage format
| may not be stable until a future release.
|
| Parameters
| ----------
| path : string File path, buffer-like, or None
| if None, return generated string
| append : boolean whether to append to an existing msgpack
| (default is False)
| compress : type of compressor (zlib or blosc), default to None (no
| compression)
|
| to_pickle(self, path)
| Pickle (serialize) object to input file path.
|
| Parameters
| ----------
| path : string
| File path
|
| to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None)
| Write records stored in a DataFrame to a SQL database.
|
| Parameters
| ----------
| name : string
| Name of SQL table
| con : SQLAlchemy engine or DBAPI2 connection (legacy mode)
| Using SQLAlchemy makes it possible to use any DB supported by that
| library. If a DBAPI2 object, only sqlite3 is supported.
| flavor : 'sqlite', default None
| DEPRECATED: this parameter will be removed in a future version,
| as 'sqlite' is the only supported option if SQLAlchemy is not
| installed.
| schema : string, default None
| Specify the schema (if database flavor supports this). If None, use
| default schema.
| if_exists : {'fail', 'replace', 'append'}, default 'fail'
| - fail: If table exists, do nothing.
| - replace: If table exists, drop it, recreate it, and insert data.
| - append: If table exists, insert data. Create if does not exist.
| index : boolean, default True
| Write DataFrame index as a column.
| index_label : string or sequence, default None
| Column label for index column(s). If None is given (default) and
| `index` is True, then the index names are used.
| A sequence should be given if the DataFrame uses MultiIndex.
| chunksize : int, default None
| If not None, then rows will be written in batches of this size at a
| time. If None, all rows will be written at once.
| dtype : dict of column name to SQL type, default None
| Optional specifying the datatype for columns. The SQL type should
| be a SQLAlchemy type, or a string for sqlite3 fallback connection.
|
| to_xarray(self)
| Return an xarray object from the pandas object.
|
| Returns
| -------
| a DataArray for a Series
| a Dataset for a DataFrame
| a DataArray for higher dims
|
| Examples
| --------
| >>> df = pd.DataFrame({'A' : [1, 1, 2],
| 'B' : ['foo', 'bar', 'foo'],
| 'C' : np.arange(4.,7)})
| >>> df
| A B C
| 0 1 foo 4.0
| 1 1 bar 5.0
| 2 2 foo 6.0
|
| >>> df.to_xarray()
| <xarray.Dataset>
| Dimensions: (index: 3)
| Coordinates:
| * index (index) int64 0 1 2
| Data variables:
| A (index) int64 1 1 2
| B (index) object 'foo' 'bar' 'foo'
| C (index) float64 4.0 5.0 6.0
|
| >>> df = pd.DataFrame({'A' : [1, 1, 2],
| 'B' : ['foo', 'bar', 'foo'],
| 'C' : np.arange(4.,7)}
| ).set_index(['B','A'])
| >>> df
| C
| B A
| foo 1 4.0
| bar 1 5.0
| foo 2 6.0
|
| >>> df.to_xarray()
| <xarray.Dataset>
| Dimensions: (A: 2, B: 2)
| Coordinates:
| * B (B) object 'bar' 'foo'
| * A (A) int64 1 2
| Data variables:
| C (B, A) float64 5.0 nan 4.0 6.0
|
| >>> p = pd.Panel(np.arange(24).reshape(4,3,2),
| items=list('ABCD'),
| major_axis=pd.date_range('20130101', periods=3),
| minor_axis=['first', 'second'])
| >>> p
| <class 'pandas.core.panel.Panel'>
| Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis)
| Items axis: A to D
| Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00
| Minor_axis axis: first to second
|
| >>> p.to_xarray()
| <xarray.DataArray (items: 4, major_axis: 3, minor_axis: 2)>
| array([[[ 0, 1],
| [ 2, 3],
| [ 4, 5]],
| [[ 6, 7],
| [ 8, 9],
| [10, 11]],
| [[12, 13],
| [14, 15],
| [16, 17]],
| [[18, 19],
| [20, 21],
| [22, 23]]])
| Coordinates:
| * items (items) object 'A' 'B' 'C' 'D'
| * major_axis (major_axis) datetime64[ns] 2013-01-01 2013-01-02 2013-01-03 # noqa
| * minor_axis (minor_axis) object 'first' 'second'
|
| Notes
| -----
| See the `xarray docs <http://xarray.pydata.org/en/stable/>`__
|
| truncate(self, before=None, after=None, axis=None, copy=True)
| Truncates a sorted NDFrame before and/or after some particular
| index value. If the axis contains only datetime values, before/after
| parameters are converted to datetime values.
|
| Parameters
| ----------
| before : date
| Truncate before index value
| after : date
| Truncate after index value
| axis : the truncation axis, defaults to the stat axis
| copy : boolean, default is True,
| return a copy of the truncated section
|
| Returns
| -------
| truncated : type of caller
|
| tshift(self, periods=1, freq=None, axis=0)
| Shift the time index, using the index's frequency if available.
|
| Parameters
| ----------
| periods : int
| Number of periods to move, can be positive or negative
| freq : DateOffset, timedelta, or time rule string, default None
| Increment to use from the tseries module or time rule (e.g. 'EOM')
| axis : int or basestring
| Corresponds to the axis that contains the Index
|
| Notes
| -----
| If freq is not specified then tries to use the freq or inferred_freq
| attributes of the index. If neither of those attributes exist, a
| ValueError is thrown
|
| Returns
| -------
| shifted : NDFrame
|
| tz_convert(self, tz, axis=0, level=None, copy=True)
| Convert tz-aware axis to target time zone.
|
| Parameters
| ----------
| tz : string or pytz.timezone object
| axis : the axis to convert
| level : int, str, default None
| If axis ia a MultiIndex, convert a specific level. Otherwise
| must be None
| copy : boolean, default True
| Also make a copy of the underlying data
|
| Returns
| -------
|
| Raises
| ------
| TypeError
| If the axis is tz-naive.
|
| tz_localize(self, tz, axis=0, level=None, copy=True, ambiguous='raise')
| Localize tz-naive TimeSeries to target time zone.
|
| Parameters
| ----------
| tz : string or pytz.timezone object
| axis : the axis to localize
| level : int, str, default None
| If axis ia a MultiIndex, localize a specific level. Otherwise
| must be None
| copy : boolean, default True
| Also make a copy of the underlying data
| ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
| - 'infer' will attempt to infer fall dst-transition hours based on
| order
| - bool-ndarray where True signifies a DST time, False designates
| a non-DST time (note that this flag is only applicable for
| ambiguous times)
| - 'NaT' will return NaT where there are ambiguous times
| - 'raise' will raise an AmbiguousTimeError if there are ambiguous
| times
| infer_dst : boolean, default False (DEPRECATED)
| Attempt to infer fall dst-transition hours based on order
|
| Returns
| -------
|
| Raises
| ------
| TypeError
| If the TimeSeries is tz-aware and tz is not None.
|
| where(self, cond, other=nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True)
| Return an object of same shape as self and whose corresponding
| entries are from self where cond is True and otherwise are from
| other.
|
| Parameters
| ----------
| cond : boolean NDFrame, array or callable
| If cond is callable, it is computed on the NDFrame and
| should return boolean NDFrame or array.
| The callable must not change input NDFrame
| (though pandas doesn't check it).
|
| .. versionadded:: 0.18.1
|
| A callable can be used as cond.
|
| other : scalar, NDFrame, or callable
| If other is callable, it is computed on the NDFrame and
| should return scalar or NDFrame.
| The callable must not change input NDFrame
| (though pandas doesn't check it).
|
| .. versionadded:: 0.18.1
|
| A callable can be used as other.
|
| inplace : boolean, default False
| Whether to perform the operation in place on the data
| axis : alignment axis if needed, default None
| level : alignment level if needed, default None
| try_cast : boolean, default False
| try to cast the result back to the input type (if possible),
| raise_on_error : boolean, default True
| Whether to raise on invalid data types (e.g. trying to where on
| strings)
|
| Returns
| -------
| wh : same type as caller
|
| Notes
| -----
| The where method is an application of the if-then idiom. For each
| element in the calling DataFrame, if ``cond`` is ``True`` the
| element is used; otherwise the corresponding element from the DataFrame
| ``other`` is used.
|
| The signature for :func:`DataFrame.where` differs from
| :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to
| ``np.where(m, df1, df2)``.
|
| For further details and examples see the ``where`` documentation in
| :ref:`indexing <indexing.where_mask>`.
|
| Examples
| --------
| >>> s = pd.Series(range(5))
| >>> s.where(s > 0)
| 0 NaN
| 1 1.0
| 2 2.0
| 3 3.0
| 4 4.0
|
| >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
| >>> m = df % 3 == 0
| >>> df.where(m, -df)
| A B
| 0 0 -1
| 1 -2 3
| 2 -4 -5
| 3 6 -7
| 4 -8 9
| >>> df.where(m, -df) == np.where(m, df, -df)
| A B
| 0 True True
| 1 True True
| 2 True True
| 3 True True
| 4 True True
| >>> df.where(m, -df) == df.mask(~m, -df)
| A B
| 0 True True
| 1 True True
| 2 True True
| 3 True True
| 4 True True
|
| See Also
| --------
| :func:`DataFrame.mask`
|
| xs(self, key, axis=0, level=None, drop_level=True)
| Returns a cross-section (row(s) or column(s)) from the
| Series/DataFrame. Defaults to cross-section on the rows (axis=0).
|
| Parameters
| ----------
| key : object
| Some label contained in the index, or partially in a MultiIndex
| axis : int, default 0
| Axis to retrieve cross-section on
| level : object, defaults to first n levels (n=1 or len(key))
| In case of a key partially contained in a MultiIndex, indicate
| which levels are used. Levels can be referred by label or position.
| drop_level : boolean, default True
| If False, returns object with same levels as self.
|
| Examples
| --------
| >>> df
| A B C
| a 4 5 2
| b 4 0 9
| c 9 7 3
| >>> df.xs('a')
| A 4
| B 5
| C 2
| Name: a
| >>> df.xs('C', axis=1)
| a 2
| b 9
| c 3
| Name: C
|
| >>> df
| A B C D
| first second third
| bar one 1 4 1 8 9
| two 1 7 5 5 0
| baz one 1 6 6 8 0
| three 2 5 3 5 3
| >>> df.xs(('baz', 'three'))
| A B C D
| third
| 2 5 3 5 3
| >>> df.xs('one', level=1)
| A B C D
| first third
| bar 1 4 1 8 9
| baz 1 6 6 8 0
| >>> df.xs(('baz', 2), level=[0, 'third'])
| A B C D
| second
| three 5 3 5 3
|
| Returns
| -------
| xs : Series or DataFrame
|
| Notes
| -----
| xs is only for getting, not setting values.
|
| MultiIndex Slicers is a generic way to get/set values on any level or
| levels. It is a superset of xs functionality, see
| :ref:`MultiIndex Slicers <advanced.mi_slicers>`
|
| ----------------------------------------------------------------------
| Data descriptors inherited from pandas.core.generic.NDFrame:
|
| at
| Fast label-based scalar accessor
|
| Similarly to ``loc``, ``at`` provides **label** based scalar lookups.
| You can also set using these indexers.
|
| blocks
| Internal property, property synonym for as_blocks()
|
| empty
| True if NDFrame is entirely empty [no items], meaning any of the
| axes are of length 0.
|
| Notes
| -----
| If NDFrame contains only NaNs, it is still not considered empty. See
| the example below.
|
| Examples
| --------
| An example of an actual empty DataFrame. Notice the index is empty:
|
| >>> df_empty = pd.DataFrame({'A' : []})
| >>> df_empty
| Empty DataFrame
| Columns: [A]
| Index: []
| >>> df_empty.empty
| True
|
| If we only have NaNs in our DataFrame, it is not considered empty! We
| will need to drop the NaNs to make the DataFrame empty:
|
| >>> df = pd.DataFrame({'A' : [np.nan]})
| >>> df
| A
| 0 NaN
| >>> df.empty
| False
| >>> df.dropna().empty
| True
|
| See also
| --------
| pandas.Series.dropna
| pandas.DataFrame.dropna
|
| iat
| Fast integer location scalar accessor.
|
| Similarly to ``iloc``, ``iat`` provides **integer** based lookups.
| You can also set using these indexers.
|
| iloc
| Purely integer-location based indexing for selection by position.
|
| ``.iloc[]`` is primarily integer position based (from ``0`` to
| ``length-1`` of the axis), but may also be used with a boolean
| array.
|
| Allowed inputs are:
|
| - An integer, e.g. ``5``.
| - A list or array of integers, e.g. ``[4, 3, 0]``.
| - A slice object with ints, e.g. ``1:7``.
| - A boolean array.
| - A ``callable`` function with one argument (the calling Series, DataFrame
| or Panel) and that returns valid output for indexing (one of the above)
|
| ``.iloc`` will raise ``IndexError`` if a requested indexer is
| out-of-bounds, except *slice* indexers which allow out-of-bounds
| indexing (this conforms with python/numpy *slice* semantics).
|
| See more at :ref:`Selection by Position <indexing.integer>`
|
| ix
| A primarily label-location based indexer, with integer position
| fallback.
|
| ``.ix[]`` supports mixed integer and label based access. It is
| primarily label based, but will fall back to integer positional
| access unless the corresponding axis is of integer type.
|
| ``.ix`` is the most general indexer and will support any of the
| inputs in ``.loc`` and ``.iloc``. ``.ix`` also supports floating
| point label schemes. ``.ix`` is exceptionally useful when dealing
| with mixed positional and label based hierachical indexes.
|
| However, when an axis is integer based, ONLY label based access
| and not positional access is supported. Thus, in such cases, it's
| usually better to be explicit and use ``.iloc`` or ``.loc``.
|
| See more at :ref:`Advanced Indexing <advanced>`.
|
| loc
| Purely label-location based indexer for selection by label.
|
| ``.loc[]`` is primarily label based, but may also be used with a
| boolean array.
|
| Allowed inputs are:
|
| - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
| interpreted as a *label* of the index, and **never** as an
| integer position along the index).
| - A list or array of labels, e.g. ``['a', 'b', 'c']``.
| - A slice object with labels, e.g. ``'a':'f'`` (note that contrary
| to usual python slices, **both** the start and the stop are included!).
| - A boolean array.
| - A ``callable`` function with one argument (the calling Series, DataFrame
| or Panel) and that returns valid output for indexing (one of the above)
|
| ``.loc`` will raise a ``KeyError`` when the items are not found.
|
| See more at :ref:`Selection by Label <indexing.label>`
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from pandas.core.generic.NDFrame:
|
| is_copy = None
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.base.PandasObject:
|
| __dir__(self)
| Provide method name lookup and completion
| Only provide 'public' methods
|
| __sizeof__(self)
| Generates the total memory usage for a object that returns
| either a value or Series of values
|
| ----------------------------------------------------------------------
| Methods inherited from pandas.core.base.StringMixin:
|
| __bytes__(self)
| Return a string representation for a particular object.
|
| Invoked by bytes(obj) in py3 only.
| Yields a bytestring in both py2/py3.
|
| __repr__(self)
| Return a string representation for a particular object.
|
| Yields Bytestring in Py2, Unicode String in py3.
|
| __str__(self)
| Return a string representation for a particular Object
|
| Invoked by str(df) in both py2/py3.
| Yields Bytestring in Py2, Unicode String in py3.
In [4]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)
Out[4]:
0 Tiger
1 Bear
2 Moose
dtype: object
In [5]:
numbers = [1, 2, 3]
pd.Series(numbers)
Out[5]:
0 1
1 2
2 3
dtype: int64
In [6]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)
Out[6]:
0 Tiger
1 Bear
2 None
dtype: object
In [7]:
numbers = [1, 2, None]
pd.Series(numbers)
Out[7]:
0 1.0
1 2.0
2 NaN
dtype: float64
In [8]:
import numpy as np
np.nan == None
Out[8]:
False
In [9]:
np.nan == np.nan
Out[9]:
False
In [10]:
np.isnan(np.nan)
Out[10]:
True
In [12]:
sports = {'a': 'b', 'c': 'd', 'e': 'f'}
s = pd.Series(sports)
print(s)
a b
c d
e f
dtype: object
In [13]:
a = pd.Series(["d", "e", "f"], index=['a', 'b', 'c'])
print(a)
a d
b e
c f
dtype: object
Content source: jhonatancasale/learning-python
Similar notebooks: