Figure 2b (Rong, Frey et. al. 2024)

Figure 2b (Rong, Frey et. al. 2024)#

Figure 2b Caption: “Growth curve of the xylitol strain with (dark blue) or without (light blue) the CRISPRi switch induced. (…) Error bars and shaded areas indicate mean ± s.d. (n = 4 biological replicates (…) OD values (…) were measured using a Jenway 6705 UV/Vis spectrophotometer (…)”

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

For protability the data is directly included as a dictionary. See the commented out code how the data was obtained from the CSV file figure_2b.csv.

# fname = "figure_2b.csv"
# data = pd.read_csv(fname).round(4)
# data.to_dict(orient="list")
data = {
    "timepoint (h)": [2, 12, 12, 16, 16, 30],
    "Uninduced_rep1": [1.8465, 1.8465, 1.8654, 1.8654, 1.7892, 1.7892],
    "Uninduced_rep2": [1.8806, 1.8806, 1.5969, 1.5969, 1.8764, 1.8764],
    "Uninduced_rep3": [2.0512, 2.0512, 1.5597, 1.5597, 1.627, 1.627],
    "Uninduced_rep4": [1.9217, 1.9217, 1.9355, 1.9355, 1.65, 1.65],
    "Induced_rep1": [2.1165, 2.1165, 2.1316, 2.1316, 2.3346, 2.3346],
    "Induced_rep2": [2.0487, 2.0487, 3.0061, 3.0061, 2.2944, 2.2944],
    "Induced_rep3": [2.0778, 2.0778, 2.1711, 2.1711, 2.4402, 2.4402],
    "Induced_rep4": [2.0859, 2.0859, 2.0865, 2.0865, 2.483, 2.483],
}
data = pd.DataFrame(data).set_index("timepoint (h)").rename_axis(columns="replicate")
data
replicate Uninduced_rep1 Uninduced_rep2 Uninduced_rep3 Uninduced_rep4 Induced_rep1 Induced_rep2 Induced_rep3 Induced_rep4
timepoint (h)
2 1.8465 1.8806 2.0512 1.9217 2.1165 2.0487 2.0778 2.0859
12 1.8465 1.8806 2.0512 1.9217 2.1165 2.0487 2.0778 2.0859
12 1.8654 1.5969 1.5597 1.9355 2.1316 3.0061 2.1711 2.0865
16 1.8654 1.5969 1.5597 1.9355 2.1316 3.0061 2.1711 2.0865
16 1.7892 1.8764 1.6270 1.6500 2.3346 2.2944 2.4402 2.4830
30 1.7892 1.8764 1.6270 1.6500 2.3346 2.2944 2.4402 2.4830
data_long = data.stack().to_frame("yield").reset_index()
data_long["condition"] = data_long["replicate"].str.split("_").str[0]
data_long
timepoint (h) replicate yield condition
0 2 Uninduced_rep1 1.8465 Uninduced
1 2 Uninduced_rep2 1.8806 Uninduced
2 2 Uninduced_rep3 2.0512 Uninduced
3 2 Uninduced_rep4 1.9217 Uninduced
4 2 Induced_rep1 2.1165 Induced
5 2 Induced_rep2 2.0487 Induced
6 2 Induced_rep3 2.0778 Induced
7 2 Induced_rep4 2.0859 Induced
8 12 Uninduced_rep1 1.8465 Uninduced
9 12 Uninduced_rep2 1.8806 Uninduced
10 12 Uninduced_rep3 2.0512 Uninduced
11 12 Uninduced_rep4 1.9217 Uninduced
12 12 Induced_rep1 2.1165 Induced
13 12 Induced_rep2 2.0487 Induced
14 12 Induced_rep3 2.0778 Induced
15 12 Induced_rep4 2.0859 Induced
16 12 Uninduced_rep1 1.8654 Uninduced
17 12 Uninduced_rep2 1.5969 Uninduced
18 12 Uninduced_rep3 1.5597 Uninduced
19 12 Uninduced_rep4 1.9355 Uninduced
20 12 Induced_rep1 2.1316 Induced
21 12 Induced_rep2 3.0061 Induced
22 12 Induced_rep3 2.1711 Induced
23 12 Induced_rep4 2.0865 Induced
24 16 Uninduced_rep1 1.8654 Uninduced
25 16 Uninduced_rep2 1.5969 Uninduced
26 16 Uninduced_rep3 1.5597 Uninduced
27 16 Uninduced_rep4 1.9355 Uninduced
28 16 Induced_rep1 2.1316 Induced
29 16 Induced_rep2 3.0061 Induced
30 16 Induced_rep3 2.1711 Induced
31 16 Induced_rep4 2.0865 Induced
32 16 Uninduced_rep1 1.7892 Uninduced
33 16 Uninduced_rep2 1.8764 Uninduced
34 16 Uninduced_rep3 1.6270 Uninduced
35 16 Uninduced_rep4 1.6500 Uninduced
36 16 Induced_rep1 2.3346 Induced
37 16 Induced_rep2 2.2944 Induced
38 16 Induced_rep3 2.4402 Induced
39 16 Induced_rep4 2.4830 Induced
40 30 Uninduced_rep1 1.7892 Uninduced
41 30 Uninduced_rep2 1.8764 Uninduced
42 30 Uninduced_rep3 1.6270 Uninduced
43 30 Uninduced_rep4 1.6500 Uninduced
44 30 Induced_rep1 2.3346 Induced
45 30 Induced_rep2 2.2944 Induced
46 30 Induced_rep3 2.4402 Induced
47 30 Induced_rep4 2.4830 Induced

Create a bar plot with error bars for the measurement across the timepoints. Either use matplotlib or seaborn for a static plot.

fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(
    data=data_long,
    x="timepoint (h)",
    y="yield",
    hue="condition",
    ax=ax,
)
<Axes: xlabel='timepoint (h)', ylabel='yield'>
../../_images/90437980d63264c8f941a36a62709435172aeabe760202b2d71727d434c82190.png

Add individual data points

Show code of one soluti}on
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(
    data=data_long,
    x="timepoint (h)",
    y="yield",
    hue="condition",
    ci="sd",
    capsize=0.1,
    errwidth=2,
    edgecolor="black",
    linewidth=1.5,
    palette=["#4A90E2", "#A8D5F7"],
    ax=ax,
)

Claude Sonnet 4.5#

Prompt: “Can you generate some example data and code to generate the following plot using Python?” Context: adding a screenshot of Figure 2b from Rong, Frey et. al. 2024

Hide code cell source

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

# Example data
time_intervals = ['2-12', '12-16', '16-30']

# Individual replicate measurements
induced_data = [
    [2.05, 2.15, 2.1],      # 2-12h
    [2.3, 2.4, 2.5],        # 12-16h
    [2.5, 2.55, 2.6]        # 16-30h
]

uninduced_data = [
    [1.9, 1.95, 2.0],       # 2-12h
    [1.7, 1.75, 1.8],       # 12-16h
    [1.68, 1.73, 1.78]      # 16-30h
]

# Calculate means and standard errors
induced_means = [np.mean(d) for d in induced_data]
uninduced_means = [np.mean(d) for d in uninduced_data]
induced_sem = [np.std(d, ddof=1) / np.sqrt(len(d)) for d in induced_data]
uninduced_sem = [np.std(d, ddof=1) / np.sqrt(len(d)) for d in uninduced_data]

# Perform t-tests for statistical significance
p_values = [stats.ttest_ind(ind, unind).pvalue 
            for ind, unind in zip(induced_data, uninduced_data)]

# Set up the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Bar positions
x = np.arange(len(time_intervals))
width = 0.35

# Create bars
bars1 = ax.bar(x - width/2, induced_means, width, 
               label='Induced', color='#4A90E2', 
               edgecolor='black', linewidth=1.5,
               yerr=induced_sem, capsize=5, error_kw={'linewidth': 2})

bars2 = ax.bar(x + width/2, uninduced_means, width,
               label='Uninduced', color='#A8D5F7',
               edgecolor='black', linewidth=1.5,
               yerr=uninduced_sem, capsize=5, error_kw={'linewidth': 2})

# Add individual data points
for i, (ind_points, unind_points) in enumerate(zip(induced_data, uninduced_data)):
    # Induced points
    x_induced = np.random.normal(i - width/2, 0.04, size=len(ind_points))
    ax.scatter(x_induced, ind_points, color='white', 
               edgecolor='black', s=80, linewidth=1.5, zorder=3)
    
    # Uninduced points
    x_uninduced = np.random.normal(i + width/2, 0.04, size=len(unind_points))
    ax.scatter(x_uninduced, unind_points, color='white',
               edgecolor='black', s=80, linewidth=1.5, zorder=3)

# Add significance asterisks
for i, p_val in enumerate(p_values):
    if p_val < 0.05:
        y_max = max(induced_means[i] + induced_sem[i], 
                    uninduced_means[i] + uninduced_sem[i])
        ax.plot([i - width/2, i + width/2], [y_max + 0.15, y_max + 0.15], 
                'k-', linewidth=1.5)
        ax.text(i, y_max + 0.2, '*', ha='center', va='bottom', 
                fontsize=20, fontweight='bold')

# Customize the plot
ax.set_ylabel('Yield (mmol mol$^{-1}$)', fontsize=14, fontweight='bold')
ax.set_xlabel('Time interval (h)', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(time_intervals, fontsize=12)
ax.set_ylim(0, 3)
ax.set_yticks([0, 1, 2, 3])
ax.tick_params(axis='both', which='major', labelsize=11)

# Add legend
ax.legend(loc='upper left', fontsize=12, frameon=False)

# Add grid
ax.grid(axis='y', linestyle='-', alpha=0.3, zorder=0)
ax.set_axisbelow(True)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_linewidth(1.5)
ax.spines['bottom'].set_linewidth(1.5)

plt.tight_layout()
plt.show()

# Print statistical results
print("\nStatistical Analysis:")
print("-" * 50)
for i, interval in enumerate(time_intervals):
    print(f"\n{interval} hours:")
    print(f"  Induced: {induced_means[i]:.3f} ± {induced_sem[i]:.3f}")
    print(f"  Uninduced: {uninduced_means[i]:.3f} ± {uninduced_sem[i]:.3f}")
    print(f"  p-value: {p_values[i]:.4f} {'*' if p_values[i] < 0.05 else ''}")
../../_images/c02b853fdef015ebec75912d6ae263efe03b6919584a608d05efe36fcb4feecb.png
Statistical Analysis:
--------------------------------------------------

2-12 hours:
  Induced: 2.100 ± 0.029
  Uninduced: 1.950 ± 0.029
  p-value: 0.0213 *

12-16 hours:
  Induced: 2.400 ± 0.058
  Uninduced: 1.750 ± 0.029
  p-value: 0.0005 *

16-30 hours:
  Induced: 2.550 ± 0.029
  Uninduced: 1.730 ± 0.029
  p-value: 0.0000 *