Matplotlib#

  • A comprehensive library for creating static, animated, and interactive visualizations in Python.

  • Built on NumPy arrays and designed to work with the broader SciPy stack.

Inspiration for plots and overviews

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["ps.fonttype"] = 42

Basic Line Plot#

From Getting Started.

fig, ax = plt.subplots()  # Create a figure containing a single Axes.
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])  # Plot some data on the Axes.
plt.show()  # Show the figure.
_images/327ebdfd933f100ec4a94bcd082f8089e85eda993713be34248ac657a3e2ee3d.png

Figure, Axes and axis Anatomy of a matplotlib figure

fig, ax = plt.subplots()  # Create a figure containing a single Axes.
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])  # Plot some data on the Axes.
ax.grid()
plt.show()
_images/7e077895267faf4138842ccdcfe289b9a516779cb0fcbe69cc5900eee21af982.png

Customize ticks

ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))
# ax.xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))
ax.get_figure()
_images/5b275ef39b4ba59b6fbba7adfd071cd9a8255eed940c72823b228b6349e08d8d.png
mu, sigma = 115, 15
x = mu + sigma * np.random.randn(10000)
fig, ax = plt.subplots(figsize=(5, 2.7), layout="constrained")
# the histogram of the data
n, bins, patches = ax.hist(
    x,
    50,
    density=True,
    facecolor="C0",  # first color in color palette
    alpha=0.75,
)

ax.set_xlabel("Length [cm]")
ax.set_ylabel("Probability")
ax.set_title("Aardvark lengths\n (not really)")
ax.text(75, 0.025, r"$\mu=115,\ \sigma=15$")
ax.axis([55, 175, 0, 0.03])
ax.grid(True)
_images/f394b675d0018c88420c9230cdf9985c631a5dead5a755e2f7556e50cdb25179.png

Anatomy of a matplotlib figure#

Exercise: Comment out parts of the code above and see what happens to the plot.

First we load the data (Source).

np.random.seed(19680801)

X = np.linspace(0.5, 3.5, 100)
Y1 = 3 + np.cos(X)
Y2 = 1 + np.cos(1 + X / 0.75) / 2
Y3 = np.random.uniform(Y1, Y2, len(X))

data = {"X": X, "red_line": Y1, "blue_line": Y2, "circles": Y3}
data = pd.DataFrame(data)
data.head()
X red_line blue_line circles
0 0.500000 3.877583 0.952138 1.828697
1 0.530303 3.862654 0.932074 1.685963
2 0.560606 3.846933 0.912120 1.765329
3 0.590909 3.830435 0.892309 2.165265
4 0.621212 3.813174 0.872675 0.937997

Create the figure without the annotations. Ready to customize!

from matplotlib.ticker import AutoMinorLocator, MultipleLocator

fig = plt.figure(figsize=(7.4, 7.4))
ax = fig.add_axes([0.2, 0.17, 0.68, 0.7], aspect=1)

ax.xaxis.set_major_locator(MultipleLocator(1.000))
ax.xaxis.set_minor_locator(AutoMinorLocator(4))
ax.yaxis.set_major_locator(MultipleLocator(1.000))
ax.yaxis.set_minor_locator(AutoMinorLocator(4))
ax.xaxis.set_minor_formatter("{x:.2f}")

ax.set_xlim(0, 4)
ax.set_ylim(0, 4)

ax.tick_params(which="major", width=1.0, length=10, labelsize=14)
ax.tick_params(which="minor", width=1.0, length=5, labelsize=10, labelcolor="0.25")

ax.grid(linestyle="--", linewidth=0.5, color=".25", zorder=-10)

ax.plot(X, Y1, c="C0", lw=2.5, label="Blue signal", zorder=10)
ax.plot(X, Y2, c="C1", lw=2.5, label="Orange signal")
ax.plot(
    X[::3],
    Y3[::3],
    linewidth=0,
    markersize=9,
    marker="s",
    markerfacecolor="none",
    markeredgecolor="C4",  # color 5 in color palette
    markeredgewidth=2.5,
)

ax.set_title("Anatomy of a figure", fontsize=20, verticalalignment="bottom")
ax.set_xlabel("x Axis label", fontsize=14)
ax.set_ylabel("y Axis label", fontsize=14)
ax.legend(loc="upper right", fontsize=14)
<matplotlib.legend.Legend at 0x7fee185c5df0>
_images/7fdafcb349136a4f1ae1039c160bfd7b17d5fdb2e1143b366d4638e21147ccf9.png

Save the figure#

  • file ending will decide format (and ‘backend’ to be used for export)

fig.savefig("anatomy_of_figure.png", dpi=300)
fig.savefig("anatomy_of_figure.pdf")
# ## Proteomics data example
# - plotting a histogram via the pandas interface
import os
import pathlib

import pandas as pd

IN_COLAB = "COLAB_GPU" in os.environ


fname = pathlib.Path("data") / "proteins" / "proteins.csv"
if IN_COLAB:
    fname = (
        "https://raw.githubusercontent.com/biosustain/dsp_workshop_dataviz_python"
        "/refs/heads/main/data/proteins/proteins.csv"
    )

df = pd.read_csv(fname, index_col=0)
df
A5A613 P00350 P00363 P00370 P00393 P00448 P00452 P00490 P00509 P00547 ... Q47319 Q47536 Q47622 Q47679 Q47710 Q57261 Q59385-2 Q59385 Q7DFV3 Q93K97
Reference
DMSO_rep1 27.180209 28.151576 30.247131 27.459171 26.823758 25.610416 NaN 27.864232 29.978578 26.064548 ... NaN NaN 25.342902 NaN 27.037851 28.410859 23.554913 27.640279 28.512794 27.223010
DMSO_rep2 NaN 27.926204 30.261665 26.873349 26.756617 24.901115 NaN 26.438754 29.047684 NaN ... NaN NaN NaN NaN 26.840857 27.940694 25.240354 27.243650 27.620780 25.291110
DMSO_rep3 NaN 27.653250 29.969625 26.599971 25.442346 25.053685 27.171761 26.381648 28.776632 NaN ... NaN NaN 24.576067 NaN 26.608837 27.070328 NaN 27.525020 27.678892 24.358694
DMSO_rep4 NaN 27.151643 29.470663 26.438623 25.798954 24.789968 NaN 26.819972 29.485008 25.524309 ... NaN NaN 25.945061 23.902241 27.163729 26.679649 22.524292 27.403753 27.255831 25.767196
Suf_rep1 NaN 27.441837 30.004725 27.399691 26.671118 25.563594 NaN 27.685173 29.295104 NaN ... NaN NaN 25.836449 NaN 26.819093 27.995432 NaN 27.498873 28.090220 25.956190
Suf_rep2 NaN 27.031610 30.085997 27.189188 26.885970 25.377559 27.363746 27.531440 29.283884 NaN ... NaN NaN NaN 24.162220 27.268473 27.055135 NaN 27.666957 27.525537 25.230565
Suf_rep3 NaN 27.814631 29.904057 27.139030 26.711192 25.318283 26.061913 27.545416 29.356666 26.264707 ... 25.46301 NaN NaN NaN 24.740745 27.313219 NaN 27.708407 27.814369 26.103059
Suf_rep4 NaN 27.587217 29.575194 27.223715 26.320866 25.360257 25.100872 27.704556 29.583906 26.426897 ... NaN 24.46752 24.757039 24.040325 27.071346 26.643479 NaN 27.847610 27.605449 26.177716

8 rows × 2269 columns

x = df.iloc[0]
x
A5A613      27.180209
P00350      28.151576
P00363      30.247131
P00370      27.459171
P00393      26.823758
              ...    
Q57261      28.410859
Q59385-2    23.554913
Q59385      27.640279
Q7DFV3      28.512794
Q93K97      27.223010
Name: DMSO_rep1, Length: 2269, dtype: float64
ax = x.hist()
_images/84ddc0d749cdd7a3915ae53cc776d850db3e5c080c373915d7f1ba8118e0729b.png
fig, ax = plt.subplots()
# try to change the color
n, bins, patches = ax.hist(x, bins=30, alpha=0.7, color="C0")
_images/f7f083b89709d7445f2ec416696c9c3149e2775a0c9413d526d90404e70dd9d2.png

Available styles#

Choose your preferred style with it’s defaults here

plt.style.use('ggplot')

ggplot seaborn_v0_8-bright seaborn_v0_8-white

with plt.style.context("ggplot"):
    fig, ax = plt.subplots()
    n, bins, patches = ax.hist(x, bins=30, alpha=0.7)
_images/bf9ec8b856de79f5aade06dd664fd6dd87f7010fa17bd595b2491593bb5cf112.png

Exercise#

Combine two plots:

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7.4, 4))
axes = axes.flatten()  # in case of more than one dimension (safety snippet for you)
ax = axes[0]
n, bins, patches = ax.hist(x, bins=30, alpha=0.7, color="C0")
ax = axes[1]
# Add a second plot here
_images/f9de0ca5202d5535483f77928dc6efcd5aa1fa3cc9e039e667703f35a6a444ba.png