Thanks to visit codestin.com
Credit goes to rabernat.github.io

Numpy and Matplotlib

These are two of the most fundamental parts of the scientific python "ecosystem". Most everything else is built on top of them.

In [3]:
import numpy as np

What did we just do? We imported a package. This brings new variables (mostly functions) into our interpreter. We access them as follows.

In [2]:
# find out what is in our namespace
dir()
Out[2]:
['In',
 'Out',
 '_',
 '__',
 '___',
 '__builtin__',
 '__builtins__',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_dh',
 '_i',
 '_i1',
 '_i2',
 '_ih',
 '_ii',
 '_iii',
 '_oh',
 '_sh',
 'exit',
 'get_ipython',
 'np',
 'quit']
In [3]:
# find out what's in numpy
dir(np)
Out[3]:
['ALLOW_THREADS',
 'BUFSIZE',
 'CLIP',
 'ComplexWarning',
 'DataSource',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'MachAr',
 'ModuleDeprecationWarning',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'PackageLoader',
 'RAISE',
 'RankWarning',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'VisibleDeprecationWarning',
 'WRAP',
 '_NoValue',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__doc__',
 '__file__',
 '__git_revision__',
 '__loader__',
 '__mkl_version__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_distributor_init',
 '_globals',
 '_import_tools',
 '_mat',
 'abs',
 'absolute',
 'absolute_import',
 'add',
 'add_docstring',
 'add_newdoc',
 'add_newdoc_ufunc',
 'add_newdocs',
 'alen',
 'all',
 'allclose',
 'alltrue',
 'alterdot',
 'amax',
 'amin',
 'angle',
 'any',
 'append',
 'apply_along_axis',
 'apply_over_axes',
 'arange',
 'arccos',
 'arccosh',
 'arcsin',
 'arcsinh',
 'arctan',
 'arctan2',
 'arctanh',
 'argmax',
 'argmin',
 'argpartition',
 'argsort',
 'argwhere',
 'around',
 'array',
 'array2string',
 'array_equal',
 'array_equiv',
 'array_repr',
 'array_split',
 'array_str',
 'asanyarray',
 'asarray',
 'asarray_chkfinite',
 'ascontiguousarray',
 'asfarray',
 'asfortranarray',
 'asmatrix',
 'asscalar',
 'atleast_1d',
 'atleast_2d',
 'atleast_3d',
 'average',
 'bartlett',
 'base_repr',
 'bench',
 'binary_repr',
 'bincount',
 'bitwise_and',
 'bitwise_not',
 'bitwise_or',
 'bitwise_xor',
 'blackman',
 'bmat',
 'bool',
 'bool8',
 'bool_',
 'broadcast',
 'broadcast_arrays',
 'broadcast_to',
 'busday_count',
 'busday_offset',
 'busdaycalendar',
 'byte',
 'byte_bounds',
 'bytes0',
 'bytes_',
 'c_',
 'can_cast',
 'cast',
 'cbrt',
 'cdouble',
 'ceil',
 'cfloat',
 'char',
 'character',
 'chararray',
 'choose',
 'clip',
 'clongdouble',
 'clongfloat',
 'column_stack',
 'common_type',
 'compare_chararrays',
 'compat',
 'complex',
 'complex128',
 'complex256',
 'complex64',
 'complex_',
 'complexfloating',
 'compress',
 'concatenate',
 'conj',
 'conjugate',
 'convolve',
 'copy',
 'copysign',
 'copyto',
 'core',
 'corrcoef',
 'correlate',
 'cos',
 'cosh',
 'count_nonzero',
 'cov',
 'cross',
 'csingle',
 'ctypeslib',
 'cumprod',
 'cumproduct',
 'cumsum',
 'datetime64',
 'datetime_as_string',
 'datetime_data',
 'deg2rad',
 'degrees',
 'delete',
 'deprecate',
 'deprecate_with_doc',
 'diag',
 'diag_indices',
 'diag_indices_from',
 'diagflat',
 'diagonal',
 'diff',
 'digitize',
 'disp',
 'divide',
 'division',
 'dot',
 'double',
 'dsplit',
 'dstack',
 'dtype',
 'e',
 'ediff1d',
 'einsum',
 'einsum_path',
 'emath',
 'empty',
 'empty_like',
 'equal',
 'errstate',
 'euler_gamma',
 'exp',
 'exp2',
 'expand_dims',
 'expm1',
 'extract',
 'eye',
 'fabs',
 'fastCopyAndTranspose',
 'fft',
 'fill_diagonal',
 'find_common_type',
 'finfo',
 'fix',
 'flatiter',
 'flatnonzero',
 'flexible',
 'flip',
 'fliplr',
 'flipud',
 'float',
 'float128',
 'float16',
 'float32',
 'float64',
 'float_',
 'float_power',
 'floating',
 'floor',
 'floor_divide',
 'fmax',
 'fmin',
 'fmod',
 'format_parser',
 'frexp',
 'frombuffer',
 'fromfile',
 'fromfunction',
 'fromiter',
 'frompyfunc',
 'fromregex',
 'fromstring',
 'full',
 'full_like',
 'fv',
 'generic',
 'genfromtxt',
 'geomspace',
 'get_array_wrap',
 'get_include',
 'get_printoptions',
 'getbufsize',
 'geterr',
 'geterrcall',
 'geterrobj',
 'gradient',
 'greater',
 'greater_equal',
 'half',
 'hamming',
 'hanning',
 'histogram',
 'histogram2d',
 'histogramdd',
 'hsplit',
 'hstack',
 'hypot',
 'i0',
 'identity',
 'iinfo',
 'imag',
 'in1d',
 'index_exp',
 'indices',
 'inexact',
 'inf',
 'info',
 'infty',
 'inner',
 'insert',
 'int',
 'int0',
 'int16',
 'int32',
 'int64',
 'int8',
 'int_',
 'int_asbuffer',
 'intc',
 'integer',
 'interp',
 'intersect1d',
 'intp',
 'invert',
 'ipmt',
 'irr',
 'is_busday',
 'isclose',
 'iscomplex',
 'iscomplexobj',
 'isfinite',
 'isfortran',
 'isinf',
 'isnan',
 'isneginf',
 'isposinf',
 'isreal',
 'isrealobj',
 'isscalar',
 'issctype',
 'issubclass_',
 'issubdtype',
 'issubsctype',
 'iterable',
 'ix_',
 'kaiser',
 'kron',
 'ldexp',
 'left_shift',
 'less',
 'less_equal',
 'lexsort',
 'lib',
 'linalg',
 'linspace',
 'little_endian',
 'load',
 'loads',
 'loadtxt',
 'log',
 'log10',
 'log1p',
 'log2',
 'logaddexp',
 'logaddexp2',
 'logical_and',
 'logical_not',
 'logical_or',
 'logical_xor',
 'logspace',
 'long',
 'longcomplex',
 'longdouble',
 'longfloat',
 'longlong',
 'lookfor',
 'ma',
 'mafromtxt',
 'mask_indices',
 'mat',
 'math',
 'matmul',
 'matrix',
 'matrixlib',
 'max',
 'maximum',
 'maximum_sctype',
 'may_share_memory',
 'mean',
 'median',
 'memmap',
 'meshgrid',
 'mgrid',
 'min',
 'min_scalar_type',
 'minimum',
 'mintypecode',
 'mirr',
 'mod',
 'modf',
 'moveaxis',
 'msort',
 'multiply',
 'nan',
 'nan_to_num',
 'nanargmax',
 'nanargmin',
 'nancumprod',
 'nancumsum',
 'nanmax',
 'nanmean',
 'nanmedian',
 'nanmin',
 'nanpercentile',
 'nanprod',
 'nanstd',
 'nansum',
 'nanvar',
 'nbytes',
 'ndarray',
 'ndenumerate',
 'ndfromtxt',
 'ndim',
 'ndindex',
 'nditer',
 'negative',
 'nested_iters',
 'newaxis',
 'nextafter',
 'nonzero',
 'not_equal',
 'nper',
 'npv',
 'numarray',
 'number',
 'obj2sctype',
 'object',
 'object0',
 'object_',
 'ogrid',
 'oldnumeric',
 'ones',
 'ones_like',
 'outer',
 'packbits',
 'pad',
 'partition',
 'percentile',
 'pi',
 'piecewise',
 'pkgload',
 'place',
 'pmt',
 'poly',
 'poly1d',
 'polyadd',
 'polyder',
 'polydiv',
 'polyfit',
 'polyint',
 'polymul',
 'polynomial',
 'polysub',
 'polyval',
 'power',
 'ppmt',
 'print_function',
 'prod',
 'product',
 'promote_types',
 'ptp',
 'put',
 'putmask',
 'pv',
 'r_',
 'rad2deg',
 'radians',
 'random',
 'rank',
 'rate',
 'ravel',
 'ravel_multi_index',
 'real',
 'real_if_close',
 'rec',
 'recarray',
 'recfromcsv',
 'recfromtxt',
 'reciprocal',
 'record',
 'remainder',
 'repeat',
 'require',
 'reshape',
 'resize',
 'restoredot',
 'result_type',
 'right_shift',
 'rint',
 'roll',
 'rollaxis',
 'roots',
 'rot90',
 'round',
 'round_',
 'row_stack',
 's_',
 'safe_eval',
 'save',
 'savetxt',
 'savez',
 'savez_compressed',
 'sctype2char',
 'sctypeDict',
 'sctypeNA',
 'sctypes',
 'searchsorted',
 'select',
 'set_numeric_ops',
 'set_printoptions',
 'set_string_function',
 'setbufsize',
 'setdiff1d',
 'seterr',
 'seterrcall',
 'seterrobj',
 'setxor1d',
 'shape',
 'shares_memory',
 'short',
 'show_config',
 'sign',
 'signbit',
 'signedinteger',
 'sin',
 'sinc',
 'single',
 'singlecomplex',
 'sinh',
 'size',
 'sometrue',
 'sort',
 'sort_complex',
 'source',
 'spacing',
 'split',
 'sqrt',
 'square',
 'squeeze',
 'stack',
 'std',
 'str',
 'str0',
 'str_',
 'string_',
 'subtract',
 'sum',
 'swapaxes',
 'take',
 'tan',
 'tanh',
 'tensordot',
 'test',
 'testing',
 'tile',
 'timedelta64',
 'trace',
 'transpose',
 'trapz',
 'tri',
 'tril',
 'tril_indices',
 'tril_indices_from',
 'trim_zeros',
 'triu',
 'triu_indices',
 'triu_indices_from',
 'true_divide',
 'trunc',
 'typeDict',
 'typeNA',
 'typecodes',
 'typename',
 'ubyte',
 'ufunc',
 'uint',
 'uint0',
 'uint16',
 'uint32',
 'uint64',
 'uint8',
 'uintc',
 'uintp',
 'ulonglong',
 'unicode',
 'unicode_',
 'union1d',
 'unique',
 'unpackbits',
 'unravel_index',
 'unsignedinteger',
 'unwrap',
 'ushort',
 'vander',
 'var',
 'vdot',
 'vectorize',
 'version',
 'void',
 'void0',
 'vsplit',
 'vstack',
 'warnings',
 'where',
 'who',
 'zeros',
 'zeros_like']
In [4]:
# find out what version we have
np.__version__
Out[4]:
'1.12.1'

The numpy documentation is crucial!

http://docs.scipy.org/doc/numpy/reference/

NDArrays

The core class is the numpy ndarray (n-dimensional array).

In [5]:
from IPython.display import Image
Image(url='http://docs.scipy.org/doc/numpy/_images/threefundamental.png')
Out[5]:
In [6]:
# create an array from a list
a = np.array([9,0,2,1,0])
In [7]:
# find out the datatype
a.dtype
Out[7]:
dtype('int64')
In [8]:
# find out the shape
a.shape
Out[8]:
(5,)
In [9]:
# what is the shape
type(a.shape)
Out[9]:
tuple
In [10]:
# another array with a different datatype and shape
b = np.array([[5,3,1,9],[9,2,3,0]], dtype=np.float64)
In [11]:
# check dtype and shape
b.dtype, b.shape
Out[11]:
(dtype('float64'), (2, 4))

Important Concept: The fastest varying dimension is the last dimension! The outer level of the hierarchy is the first dimension. (This is called "c-style" indexing)

More array creation

There are lots of ways to create arrays.

In [12]:
# create some uniform arrays
c = np.zeros((9,9))
d = np.ones((3,6,3), dtype=np.complex128)
e = np.full((3,3), np.pi)
e = np.ones_like(c)
f = np.zeros_like(d)
In [13]:
# create some ranges
np.arange(10)
Out[13]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [14]:
# arange is left inclusive, right exclusive
np.arange(2,4,0.25)
Out[14]:
array([ 2.  ,  2.25,  2.5 ,  2.75,  3.  ,  3.25,  3.5 ,  3.75])
In [15]:
# linearly spaced
np.linspace(2,4,20)
Out[15]:
array([ 2.        ,  2.10526316,  2.21052632,  2.31578947,  2.42105263,
        2.52631579,  2.63157895,  2.73684211,  2.84210526,  2.94736842,
        3.05263158,  3.15789474,  3.26315789,  3.36842105,  3.47368421,
        3.57894737,  3.68421053,  3.78947368,  3.89473684,  4.        ])
In [16]:
# log spaced
np.logspace(1,2,10)
Out[16]:
array([  10.        ,   12.91549665,   16.68100537,   21.5443469 ,
         27.82559402,   35.93813664,   46.41588834,   59.94842503,
         77.42636827,  100.        ])
In [17]:
# two dimensional grids
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y = np.linspace(-np.pi, np.pi, 50)
xx, yy = np.meshgrid(x, y)
xx.shape, yy.shape
Out[17]:
((50, 100), (50, 100))

Indexing

Basic indexing is similar to lists

In [18]:
# get some individual elements of xx
xx[0,0], xx[-1,-1], xx[3,-5]
Out[18]:
(-6.2831853071795862, 6.2831853071795862, 5.7754531611448723)
In [19]:
# get some whole rows and columns
xx[0].shape, xx[:,-1].shape
Out[19]:
((100,), (50,))
In [20]:
# get some ranges
xx[3:10,30:40].shape
Out[20]:
(7, 10)

There are many advanced ways to index arrays. You can read about them in the manual. Here is one example.

In [21]:
# use a boolean array as an index
idx = xx<0
yy[idx].shape
Out[21]:
(2500,)
In [22]:
# the array got flattened
xx.ravel().shape
Out[22]:
(5000,)

Array Operations

There are a huge number of operations available on arrays. All the familiar arithemtic operators are applied on an element-by-element basis.

Basic Math

In [23]:
f = np.sin(xx) * np.cos(0.5*yy)

At this point you might be getting curious what these arrays "look" like. So we need to introduce some visualization.

In [24]:
from matplotlib import pyplot as plt
%matplotlib inline
In [25]:
plt.pcolormesh(f)
Out[25]:

Manipulating array dimensions

In [26]:
# transpose
plt.pcolormesh(f.T)
Out[26]:
In [27]:
# reshape an array (wrong size)
g = np.reshape(f, (8,9))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
 in ()
      1 # reshape an array (wrong size)
----> 2 g = np.reshape(f, (8,9))

/anaconda/lib/python3.6/site-packages/numpy/core/fromnumeric.py in reshape(a, newshape, order)
    230            [5, 6]])
    231     """
--> 232     return _wrapfunc(a, 'reshape', newshape, order=order)
    233 
    234 

/anaconda/lib/python3.6/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     55 def _wrapfunc(obj, method, *args, **kwds):
     56     try:
---> 57         return getattr(obj, method)(*args, **kwds)
     58 
     59     # An AttributeError occurs if the object does not have

ValueError: cannot reshape array of size 5000 into shape (8,9)
In [28]:
# reshape an array (right size) and mess it up
print(f.size)
g = np.reshape(f, (200,25))
plt.pcolormesh(g)
5000
Out[28]:
In [29]:
# tile an array
plt.pcolormesh(np.tile(f,(6,1)))
Out[29]:

Broadcasting

Broadcasting is an efficient way to multiply arrays of different sizes

In [30]:
Image(url='http://scipy-lectures.github.io/_images/numpy_broadcasting.png',
     width=720)
Out[30]:
In [31]:
# multiply f by x
print(f.shape, x.shape)
g = f * x
print(g.shape)
(50, 100) (100,)
(50, 100)
In [32]:
# multiply f by y
print(f.shape, y.shape)
h = f * y
print(h.shape)
(50, 100) (50,)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
 in ()
      1 # multiply f by y
      2 print(f.shape, y.shape)
----> 3 h = f * y
      4 print(h.shape)

ValueError: operands could not be broadcast together with shapes (50,100) (50,) 
In [33]:
# use newaxis special syntax
h = f * y[:,np.newaxis]
print(h.shape)
(50, 100)
In [34]:
plt.pcolormesh(g)
Out[34]:

Reduction Operations

In [35]:
# sum
g.sum()
Out[35]:
-3083.0383878071548
In [36]:
# mean
g.mean()
Out[36]:
-0.61660767756143098
In [37]:
# std
g.std()
Out[37]:
1.6402280119141424
In [38]:
# apply on just one axis
g_ymean = g.mean(axis=0)
g_xmean = g.mean(axis=1)
In [39]:
plt.plot(x, g_ymean)
Out[39]:
[]
In [40]:
plt.plot(g_xmean, y)
Out[40]:
[]

Fancy Plotting

Enough lessons, let's have some fun.

In [41]:
fig = plt.figure(figsize=(12,8))
ax1 = plt.subplot2grid((6,6),(0,1),colspan=5)
ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5)
In [42]:
fig = plt.figure(figsize=(10,6))
ax1 = plt.subplot2grid((6,6),(0,1),colspan=5)
ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5)
ax3 = plt.subplot2grid((6,6),(1,1),rowspan=5, colspan=5)

ax1.plot(x, g_ymean)
ax2.plot(g_xmean, y)
ax3.pcolormesh(x, y,  g)

ax1.set_xlim([x.min(), x.max()])
ax3.set_xlim([x.min(), x.max()])
ax2.set_ylim([y.min(), y.max()])
ax3.set_ylim([y.min(), y.max()])

plt.tight_layout()

Real Data

ARGO float profile from North Atlantic

In [43]:
# download with curl
!curl -O http://www.ldeo.columbia.edu/~rpa/argo_float_4901412.npz
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  140k  100  140k    0     0   402k      0 --:--:-- --:--:-- --:--:--  405k
In [4]:
# load numpy file and examine keys
data = np.load('argo_float_4901412.npz')
data.keys()
Out[4]:
['S', 'T', 'levels', 'lon', 'date', 'P', 'lat']
In [5]:
# access some data
T = data['T']
In [6]:
# there are "nans", missing data, which screw up our routines
T.min()
Out[6]:
nan
In [7]:
ar_w_mask = np.ma.masked_array([1, 2, 3, 4, 5],
                        mask=[True, True, False, False, False])
ar_w_mask
Out[7]:
masked_array(data = [-- -- 3 4 5],
             mask = [ True  True False False False],
       fill_value = 999999)
In [8]:
ar_w_mask.mean()
Out[8]:
4.0
In [10]:
T_ma = np.ma.masked_invalid(T)
T_ma.mean()
Out[10]:
11.104955983298781

Masked Arrays

This is how we deal with missing data in numpy

In [47]:
# create masked array
T = np.ma.masked_invalid(data['T'])
type(T)
Out[47]:
numpy.ma.core.MaskedArray
In [48]:
# max and min
T.max(), T.min()
Out[48]:
(24.364999771118164, 3.5320000648498535)
In [49]:
# load other data
S = np.ma.masked_invalid(data['S'])
P = np.ma.masked_invalid(data['P'])
In [50]:
# scatter plot
plt.scatter(S, T, c=P)
plt.grid()
plt.colorbar()
Out[50]: