Import Convention
import numpy as np
Creating Arrays
# From Python sequences
a = np.array([1, 2, 3]) # 1-D, dtype inferred (int64)
b = np.array([[1, 2], [3, 4]], dtype=float) # 2-D, explicit dtype
# Filled arrays
np.zeros((3, 4)) # 3x4 array of 0.0
np.ones((2, 3), dtype=int) # 2x3 array of 1
np.full((2, 2), 7.5) # 2x2 filled with 7.5
np.eye(3) # 3x3 identity matrix
np.empty((4, 4)) # uninitialized — fast allocation, garbage values
# Ranges & sequences
np.arange(0, 10, 2) # [0 2 4 6 8] — like range() but returns array
np.arange(0.0, 1.0, 0.25) # works with floats too
np.linspace(0, 1, 5) # [0. 0.25 0.5 0.75 1. ] — 5 evenly spaced
np.linspace(0, 1, 5, endpoint=False) # exclude right endpoint
# Special
np.zeros_like(a) # same shape & dtype as a, filled with 0
np.ones_like(b) # same shape & dtype as b, filled with 1
dtype and Type Casting
a = np.array([1, 2, 3])
a.dtype # dtype('int64')
# Common dtypes
np.float32, np.float64 # single / double precision
np.int8, np.int32, np.int64
np.bool_, np.complex128
# Cast
a.astype(np.float32) # new array, original unchanged
a.astype(str) # ['1' '2' '3']
# dtype memory footprint
np.dtype(np.float32).itemsize # 4 bytes per element
Shape, Size, and Dimensions
a = np.zeros((3, 4, 5))
a.ndim # 3 — number of axes
a.shape # (3, 4, 5)
a.size # 60 — total elements
a.nbytes # 60 * 8 = 480 bytes (float64)
# Shape shortcuts
len(a) # 3 — length of axis 0 only
a.shape[0] # 3 (rows), a.shape[1] = 4 (cols)
Reshape, Ravel, Flatten, Squeeze, Expand
a = np.arange(12)
a.reshape(3, 4) # view if possible — shape (3,4)
a.reshape(3, -1) # -1 lets NumPy infer: (3, 4)
a.reshape(-1, 1) # column vector (12, 1)
a.reshape(1, -1) # row vector (1, 12)
# Flatten vs ravel
a.reshape(3, 4).flatten() # always a COPY
a.reshape(3, 4).ravel() # COPY only if needed (prefer this)
# Add / remove size-1 axes
x = np.array([1, 2, 3]) # shape (3,)
x[:, np.newaxis] # shape (3, 1) — column vector
x[np.newaxis, :] # shape (1, 3) — row vector
np.expand_dims(x, axis=0) # same as above
np.squeeze(np.zeros((1, 3, 1))) # shape (3,) — remove all size-1 axes
Indexing and Slicing
a = np.array([[10, 20, 30],
[40, 50, 60],
[70, 80, 90]])
# Basic
a[0] # first row → [10 20 30]
a[0, 2] # 30
a[-1, -1] # 90
# Slicing [start:stop:step]
a[:, 0] # column 0 → [10 40 70]
a[1:, 1:] # bottom-right 2x2
a[::2, ::2] # every other row and col → [[10 30],[70 90]]
# Multidimensional
a[0, :] # row 0
a[:, 1] # column 1
# Step / reverse
a[::-1] # rows reversed
a[:, ::-1] # columns reversed
Boolean Indexing
a = np.array([5, 12, 3, 9, 17])
mask = a > 8 # [False True False True True]
a[mask] # [12 9 17] — always a COPY
a[a % 2 == 0] # [12]
# Compound conditions — use & | ~ (not and / or)
a[(a > 4) & (a < 15)] # [ 5 12 9]
# Assign through boolean mask
a[a < 0] = 0 # clip negatives in-place
# 2-D boolean indexing
m = np.arange(9).reshape(3, 3)
m[m > 4] # flat array [5 6 7 8]
# np.where to get indices
np.where(a > 8) # (array([1, 3, 4]),)
rows, cols = np.where(m > 4)
Fancy (Integer) Indexing
a = np.array([10, 20, 30, 40, 50])
# Select by index list — ALWAYS a copy
a[[0, 2, 4]] # [10 30 50]
a[[1, 1, 3]] # [20 20 40] (repeats allowed)
# 2-D fancy indexing
m = np.arange(12).reshape(4, 3)
rows = [0, 2, 3]
cols = [1, 0, 2]
m[rows, cols] # m[0,1], m[2,0], m[3,2] → [1 6 11]
# Combine with slicing
m[[0, 2], :] # rows 0 and 2, all columns
m[:, [0, 2]] # all rows, columns 0 and 2
Broadcasting Rules
Broadcasting lets NumPy work on arrays of different shapes without making copies.
Rules — compared right-to-left on shapes:
- If shapes differ in number of dimensions, prepend 1s to the smaller shape.
- Axes of size 1 are stretched to match the other.
- If neither size is 1 and sizes differ — error.
# Shape (3,) + scalar → trivial
np.array([1, 2, 3]) + 10 # [11 12 13]
# (3,1) + (1,4) → (3,4)
col = np.array([[0], [10], [20]]) # shape (3, 1)
row = np.array([[1, 2, 3, 4]]) # shape (1, 4)
col + row
# [[ 1 2 3 4]
# [11 12 13 14]
# [21 22 23 24]]
# Practical: subtract mean from each column of a (100, 5) matrix
X = np.random.randn(100, 5)
X_centered = X - X.mean(axis=0) # X.mean(axis=0) shape (5,) → broadcasts over rows
# (3, 4) + (4,) → (3, 4) — (4,) treated as (1,4)
np.ones((3, 4)) + np.arange(4)
# ERROR: (3,) + (4,) — incompatible, neither is 1
# np.array([1,2,3]) + np.array([1,2,3,4]) → ValueError
Element-wise Operations and Ufuncs
a = np.array([1.0, 4.0, 9.0, 16.0])
b = np.array([2.0, 2.0, 3.0, 4.0])
# Arithmetic — all element-wise
a + b; a - b; a * b; a / b; a ** 2; a // b; a % b
# NumPy ufuncs (universal functions) — C-speed loops
np.sqrt(a) # [1. 2. 3. 4.]
np.log(a) # natural log
np.log2(a); np.log10(a)
np.exp(a)
np.abs(np.array([-1, -2, 3])) # [1 2 3]
np.sin(a); np.cos(a); np.tan(a)
np.ceil(a); np.floor(a); np.round(a, decimals=2)
# Binary ufuncs
np.maximum(a, b) # element-wise max
np.minimum(a, b)
np.add(a, b) # same as a + b but callable
np.multiply.outer(np.array([1,2,3]), np.array([1,2,3])) # outer product
# In-place (no intermediate allocation)
a += 1 # modifies a directly
np.sqrt(a, out=a) # write result into a — zero extra memory
Aggregations and Axis Intuition
Axis 0 collapses rows (operates down columns). Axis 1 collapses columns (operates across rows).
m = np.array([[1, 2, 3],
[4, 5, 6]]) # shape (2, 3)
m.sum() # 21 — all elements
m.sum(axis=0) # [5 7 9] — sum each column (collapse rows)
m.sum(axis=1) # [ 6 15] — sum each row (collapse cols)
m.mean(axis=0) # [2.5 3.5 4.5]
m.std(axis=1) # row-wise std
m.min(); m.max()
m.min(axis=0) # column-wise minimum
m.argmin(axis=0) # INDEX of column-wise minimum
m.argmax(axis=1) # index of row-wise maximum
m.cumsum(axis=1) # cumulative sum along rows
m.prod(axis=0) # product along columns
# keepdims — preserve axes for broadcasting
means = m.mean(axis=1, keepdims=True) # shape (2,1), not (2,)
m - means # subtract each row's mean from that row
# Nanaware versions
np.nanmean(m); np.nansum(m); np.nanstd(m)
Matrix Operations
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
# Matrix multiplication — three equivalent ways
A @ B # preferred (PEP 465, Python 3.5+)
np.matmul(A, B)
np.dot(A, B) # also works for 2-D; use @ for clarity
# Dot product of 1-D arrays (inner product)
np.dot(np.array([1, 2, 3]), np.array([4, 5, 6])) # 32
# Outer product
np.outer(np.array([1, 2]), np.array([3, 4, 5])) # shape (2, 3)
# Transpose
A.T # view — no copy
A.transpose() # same
np.transpose(A, axes=(1, 0)) # explicit axes for higher-D
# Linear algebra (np.linalg)
np.linalg.det(A) # determinant: -2.0
np.linalg.inv(A) # inverse
np.linalg.norm(A) # Frobenius norm by default
np.linalg.norm(A, ord=2) # spectral norm
# Solve Ax = b — prefer over inv(A) @ b (more stable)
b = np.array([5, 6])
x = np.linalg.solve(A, b) # x s.t. A @ x == b
# Eigenvalues and eigenvectors
vals, vecs = np.linalg.eig(A)
# SVD
U, s, Vt = np.linalg.svd(A)
# Matrix power
np.linalg.matrix_power(A, 3) # A @ A @ A
Concatenation, Stacking, and Splitting
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
# Concatenate along an existing axis
np.concatenate([a, b], axis=0) # stack vertically shape (4,2)
np.concatenate([a, b], axis=1) # stack horizontally shape (2,4)
# Convenience wrappers
np.vstack([a, b]) # axis=0 (adds rows)
np.hstack([a, b]) # axis=1 (adds columns)
np.dstack([a, b]) # axis=2 (depth)
# stack — creates a NEW axis
np.stack([a, b], axis=0) # shape (2, 2, 2)
np.stack([a, b], axis=1) # shape (2, 2, 2) — different interleaving
# Splitting
x = np.arange(12).reshape(3, 4)
np.split(x, 3, axis=0) # 3 equal pieces along rows
np.array_split(x, 5, axis=1) # 5 pieces (unequal OK)
np.hsplit(x, 2) # split horizontally into 2
np.vsplit(x, 3) # split vertically into 3
Useful Array Manipulation Functions
a = np.array([3, 1, 4, 1, 5, 9, 2, 6])
# Sort
np.sort(a) # returns sorted copy
a.sort() # in-place
np.sort(a)[::-1] # descending
# argsort — indices that would sort the array
idx = np.argsort(a) # [1 3 6 0 2 4 7 5] (indices into a)
a[idx] # sorted values via fancy indexing
# 2-D sort along axis
m = np.array([[3,1],[4,1]])
np.sort(m, axis=1) # sort each row
# argmax / argmin — flat index by default
np.argmax(a) # 5 (index of 9)
np.argmin(a) # 1 (index of first 1)
# Unique
np.unique(a) # [1 2 3 4 5 6 9]
vals, counts = np.unique(a, return_counts=True)
# Where — conditional select
np.where(a > 4, a, 0) # keep values > 4, else 0
np.where(a % 2 == 0, 'even', 'odd')
# Clip
np.clip(a, 2, 6) # values below 2 → 2, above 6 → 6, else unchanged
# Flip
np.flip(a) # reverse entire array
np.flip(m, axis=0) # flip rows
# Tile & repeat
np.tile(a, 3) # repeat the whole array 3 times
np.repeat(a, 2) # repeat each element 2 times
Views vs Copies — A Critical Gotcha
NumPy slices return VIEWS, not copies. Modifying the slice modifies the original.
a = np.array([1, 2, 3, 4, 5])
# SLICE = VIEW
s = a[1:4]
s[0] = 99
a # [1 99 3 4 5] — original changed!
# Fancy/boolean indexing = COPY (always safe to modify)
f = a[[1, 2, 3]]
f[0] = 0
a # original unchanged
# Check ownership
s.base is a # True — s is a view of a
f.base is None # True — f owns its data (copy)
# Force a copy from a slice
c = a[1:4].copy()
c[0] = 0
a # unchanged
# reshape is usually a view
r = np.arange(6).reshape(2, 3)
r.base # the original arange array
r.flags['OWNDATA'] # False
# Ravel may or may not copy — flatten always copies
arr = np.array([[1,2],[3,4]])
arr.ravel().base is arr # True (view)
arr.flatten().base is None # True (copy)
# Transpose is always a view
arr.T.base is arr # True
Random Number Generation
rng = np.random.default_rng(seed=42) # modern API — prefer over legacy np.random.*
# Floats in [0, 1)
rng.random((3, 4)) # shape (3, 4)
# Normal distribution
rng.standard_normal((100, 5)) # mean=0, std=1
rng.normal(loc=5, scale=2, size=(100,))
# Integers
rng.integers(0, 10, size=20) # [0, 10) exclusive
# Uniform
rng.uniform(low=-1, high=1, size=(50,))
# Shuffle and permutation
arr = np.arange(10)
rng.shuffle(arr) # in-place
rng.permutation(10) # returns new shuffled array
# Choice (with or without replacement)
rng.choice(arr, size=5, replace=False)
# Reproducible sub-streams
rng1 = np.random.default_rng(0)
rng2 = np.random.default_rng(0)
# rng1 and rng2 produce identical sequences
# Legacy API (still common in older code)
np.random.seed(42)
np.random.randn(3, 4) # normal
np.random.rand(3, 4) # uniform [0,1)
np.random.randint(0, 10, (20,))
Vectorization vs Python Loops
NumPy operations are implemented in C and process entire arrays without Python per-element overhead.
import time
n = 1_000_000
a = np.random.randn(n)
b = np.random.randn(n)
# Python loop — slow
def loop_add(a, b):
return [a[i] + b[i] for i in range(len(a))]
# NumPy vectorized — fast
result = a + b
# Comparison
t0 = time.perf_counter(); loop_add(a, b); print(time.perf_counter()-t0) # ~0.5 s
t0 = time.perf_counter(); a + b; print(time.perf_counter()-t0) # ~0.002 s
# Vectorize a custom Python function (syntactic sugar, NOT faster than a loop)
f = np.vectorize(lambda x: x**2 + 2*x + 1) # use only when you must
# The right way — express with ufuncs/array ops
def fast_f(x):
return x**2 + 2*x + 1 # operates on whole array at once
# Dot product: avoid explicit loops
np.dot(a, b) # C-speed; not sum(a[i]*b[i] for i in range(n))
# Boolean mask instead of loop filter
big = a[a > 2.0] # no Python loop
# Apply along axis with np.apply_along_axis — still Python loop underneath
# prefer axis-aware ufuncs (sum, max, etc.) when possible
Aggregations for ML Data Shapes
Standard ML convention: X has shape (n_samples, n_features), y has shape (n_samples,).
n, p = 1000, 20
X = np.random.randn(n, p) # (1000, 20) feature matrix
y = np.random.randint(0, 2, n) # (1000,) binary labels
# Feature statistics
mu = X.mean(axis=0) # (20,) per-feature mean
sigma = X.std(axis=0) # (20,) per-feature std
X_std = (X - mu) / sigma # broadcasting: (1000,20) - (20,) → (1000,20)
# Sample norms
norms = np.linalg.norm(X, axis=1) # (1000,) — L2 norm per sample
X_normalized = X / norms[:, np.newaxis] # (1000,20) / (1000,1)
# Train / val split (manual)
idx = np.random.default_rng(0).permutation(n)
split = int(0.8 * n)
X_train, X_val = X[idx[:split]], X[idx[split:]]
y_train, y_val = y[idx[:split]], y[idx[split:]]
# One-hot encoding
n_classes = 3
labels = np.array([0, 2, 1, 0])
ohe = np.eye(n_classes)[labels] # shape (4, 3)
# Covariance matrix
C = np.cov(X.T) # (20, 20) — each column is a variable
# Batch matrix multiply (3-D arrays)
W = np.random.randn(32, 20, 10) # 32 weight matrices of shape 20x10
b = np.random.randn(32, n, 20) # 32 batches of n samples
# b @ W → (32, n, 10) — batched matmul
# Flatten images: (n, H, W, C) → (n, H*W*C)
images = np.random.randint(0, 256, (100, 28, 28, 1), dtype=np.uint8)
flat = images.reshape(100, -1) # (100, 784)
Saving and Loading Arrays
a = np.arange(100).reshape(10, 10)
# Binary format — fast, lossless, preserves dtype
np.save('array.npy', a)
a2 = np.load('array.npy')
# Multiple arrays in one file
np.savez('arrays.npz', X=a, y=np.arange(10))
data = np.load('arrays.npz')
data['X']; data['y']
# Compressed (larger arrays)
np.savez_compressed('arrays_c.npz', X=a)
# Text (CSV-like) — slow, loses precision for floats
np.savetxt('array.csv', a, delimiter=',', fmt='%.6f')
a3 = np.loadtxt('array.csv', delimiter=',')
Quick Reference: Shape Transformations
# (n,) → (n,1) add column axis
x[:, np.newaxis] # or x.reshape(-1, 1)
# (n,) → (1,n) add row axis
x[np.newaxis, :] # or x.reshape(1, -1)
# (n,1) → (n,) remove trailing axis
x.squeeze() # or x[:, 0] or x.reshape(-1)
# (n,m) → (m,n) transpose
X.T
# (a,b,c) → (a,c,b) swap last two axes
np.swapaxes(arr, 1, 2) # or arr.transpose(0, 2, 1)
# Flatten everything
arr.ravel() # prefer (view when possible)
# Stack 1-D arrays into columns: (n,) + (n,) → (n,2)
np.column_stack([a, b])
# Stack 1-D arrays into rows: (n,) + (n,) → (2,n)
np.row_stack([a, b]) # same as np.vstack