Figure 2b (Rong, Frey et. al. 2024)#

Figure 2b Caption: “Growth curve of the xylitol strain with (dark blue) or without (light blue) the CRISPRi switch induced. (…) Error bars and shaded areas indicate mean ± s.d. (n = 4 biological replicates (…) OD values (…) were measured using a Jenway 6705 UV/Vis spectrophotometer (…)”

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

For protability the data is directly included as a dictionary. See the commented out code how the data was obtained from the CSV file figure_2b.csv.

# fname = "figure_2b.csv"
# data = pd.read_csv(fname).round(4)
# data.to_dict(orient="list")
data = {
    "timepoint (h)": [2, 12, 12, 16, 16, 30],
    "Uninduced_rep1": [1.8465, 1.8465, 1.8654, 1.8654, 1.7892, 1.7892],
    "Uninduced_rep2": [1.8806, 1.8806, 1.5969, 1.5969, 1.8764, 1.8764],
    "Uninduced_rep3": [2.0512, 2.0512, 1.5597, 1.5597, 1.627, 1.627],
    "Uninduced_rep4": [1.9217, 1.9217, 1.9355, 1.9355, 1.65, 1.65],
    "Induced_rep1": [2.1165, 2.1165, 2.1316, 2.1316, 2.3346, 2.3346],
    "Induced_rep2": [2.0487, 2.0487, 3.0061, 3.0061, 2.2944, 2.2944],
    "Induced_rep3": [2.0778, 2.0778, 2.1711, 2.1711, 2.4402, 2.4402],
    "Induced_rep4": [2.0859, 2.0859, 2.0865, 2.0865, 2.483, 2.483],
}
data = pd.DataFrame(data).set_index("timepoint (h)").rename_axis(columns="replicate")
data

replicate	Uninduced_rep1	Uninduced_rep2	Uninduced_rep3	Uninduced_rep4	Induced_rep1	Induced_rep2	Induced_rep3	Induced_rep4
timepoint (h)
2	1.8465	1.8806	2.0512	1.9217	2.1165	2.0487	2.0778	2.0859
12	1.8465	1.8806	2.0512	1.9217	2.1165	2.0487	2.0778	2.0859
12	1.8654	1.5969	1.5597	1.9355	2.1316	3.0061	2.1711	2.0865
16	1.8654	1.5969	1.5597	1.9355	2.1316	3.0061	2.1711	2.0865
16	1.7892	1.8764	1.6270	1.6500	2.3346	2.2944	2.4402	2.4830
30	1.7892	1.8764	1.6270	1.6500	2.3346	2.2944	2.4402	2.4830

data_long = data.stack().to_frame("yield").reset_index()
data_long["condition"] = data_long["replicate"].str.split("_").str[0]
data_long

	timepoint (h)	replicate	yield	condition
0	2	Uninduced_rep1	1.8465	Uninduced
1	2	Uninduced_rep2	1.8806	Uninduced
2	2	Uninduced_rep3	2.0512	Uninduced
3	2	Uninduced_rep4	1.9217	Uninduced
4	2	Induced_rep1	2.1165	Induced
5	2	Induced_rep2	2.0487	Induced
6	2	Induced_rep3	2.0778	Induced
7	2	Induced_rep4	2.0859	Induced
8	12	Uninduced_rep1	1.8465	Uninduced
9	12	Uninduced_rep2	1.8806	Uninduced
10	12	Uninduced_rep3	2.0512	Uninduced
11	12	Uninduced_rep4	1.9217	Uninduced
12	12	Induced_rep1	2.1165	Induced
13	12	Induced_rep2	2.0487	Induced
14	12	Induced_rep3	2.0778	Induced
15	12	Induced_rep4	2.0859	Induced
16	12	Uninduced_rep1	1.8654	Uninduced
17	12	Uninduced_rep2	1.5969	Uninduced
18	12	Uninduced_rep3	1.5597	Uninduced
19	12	Uninduced_rep4	1.9355	Uninduced
20	12	Induced_rep1	2.1316	Induced
21	12	Induced_rep2	3.0061	Induced
22	12	Induced_rep3	2.1711	Induced
23	12	Induced_rep4	2.0865	Induced
24	16	Uninduced_rep1	1.8654	Uninduced
25	16	Uninduced_rep2	1.5969	Uninduced
26	16	Uninduced_rep3	1.5597	Uninduced
27	16	Uninduced_rep4	1.9355	Uninduced
28	16	Induced_rep1	2.1316	Induced
29	16	Induced_rep2	3.0061	Induced
30	16	Induced_rep3	2.1711	Induced
31	16	Induced_rep4	2.0865	Induced
32	16	Uninduced_rep1	1.7892	Uninduced
33	16	Uninduced_rep2	1.8764	Uninduced
34	16	Uninduced_rep3	1.6270	Uninduced
35	16	Uninduced_rep4	1.6500	Uninduced
36	16	Induced_rep1	2.3346	Induced
37	16	Induced_rep2	2.2944	Induced
38	16	Induced_rep3	2.4402	Induced
39	16	Induced_rep4	2.4830	Induced
40	30	Uninduced_rep1	1.7892	Uninduced
41	30	Uninduced_rep2	1.8764	Uninduced
42	30	Uninduced_rep3	1.6270	Uninduced
43	30	Uninduced_rep4	1.6500	Uninduced
44	30	Induced_rep1	2.3346	Induced
45	30	Induced_rep2	2.2944	Induced
46	30	Induced_rep3	2.4402	Induced
47	30	Induced_rep4	2.4830	Induced

Create a bar plot with error bars for the measurement across the timepoints. Either use matplotlib or seaborn for a static plot.

fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(
    data=data_long,
    x="timepoint (h)",
    y="yield",
    hue="condition",
    ax=ax,
)

<Axes: xlabel='timepoint (h)', ylabel='yield'>

../../_images/90437980d63264c8f941a36a62709435172aeabe760202b2d71727d434c82190.png

Add individual data points

Show code of one soluti}on

fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(
    data=data_long,
    x="timepoint (h)",
    y="yield",
    hue="condition",
    ci="sd",
    capsize=0.1,
    errwidth=2,
    edgecolor="black",
    linewidth=1.5,
    palette=["#4A90E2", "#A8D5F7"],
    ax=ax,
)

Claude Sonnet 4.5#

Prompt: “Can you generate some example data and code to generate the following plot using Python?” Context: adding a screenshot of Figure 2b from Rong, Frey et. al. 2024

Show code cell source

Hide code cell source

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

# Example data
time_intervals = ['2-12', '12-16', '16-30']

# Individual replicate measurements
induced_data = [
    [2.05, 2.15, 2.1],      # 2-12h
    [2.3, 2.4, 2.5],        # 12-16h
    [2.5, 2.55, 2.6]        # 16-30h
]

uninduced_data = [
    [1.9, 1.95, 2.0],       # 2-12h
    [1.7, 1.75, 1.8],       # 12-16h
    [1.68, 1.73, 1.78]      # 16-30h
]

# Calculate means and standard errors
induced_means = [np.mean(d) for d in induced_data]
uninduced_means = [np.mean(d) for d in uninduced_data]
induced_sem = [np.std(d, ddof=1) / np.sqrt(len(d)) for d in induced_data]
uninduced_sem = [np.std(d, ddof=1) / np.sqrt(len(d)) for d in uninduced_data]

# Perform t-tests for statistical significance
p_values = [stats.ttest_ind(ind, unind).pvalue 
            for ind, unind in zip(induced_data, uninduced_data)]

# Set up the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Bar positions
x = np.arange(len(time_intervals))
width = 0.35

# Create bars
bars1 = ax.bar(x - width/2, induced_means, width, 
               label='Induced', color='#4A90E2', 
               edgecolor='black', linewidth=1.5,
               yerr=induced_sem, capsize=5, error_kw={'linewidth': 2})

bars2 = ax.bar(x + width/2, uninduced_means, width,
               label='Uninduced', color='#A8D5F7',
               edgecolor='black', linewidth=1.5,
               yerr=uninduced_sem, capsize=5, error_kw={'linewidth': 2})

# Add individual data points
for i, (ind_points, unind_points) in enumerate(zip(induced_data, uninduced_data)):
    # Induced points
    x_induced = np.random.normal(i - width/2, 0.04, size=len(ind_points))
    ax.scatter(x_induced, ind_points, color='white', 
               edgecolor='black', s=80, linewidth=1.5, zorder=3)
    
    # Uninduced points
    x_uninduced = np.random.normal(i + width/2, 0.04, size=len(unind_points))
    ax.scatter(x_uninduced, unind_points, color='white',
               edgecolor='black', s=80, linewidth=1.5, zorder=3)

# Add significance asterisks
for i, p_val in enumerate(p_values):
    if p_val < 0.05:
        y_max = max(induced_means[i] + induced_sem[i], 
                    uninduced_means[i] + uninduced_sem[i])
        ax.plot([i - width/2, i + width/2], [y_max + 0.15, y_max + 0.15], 
                'k-', linewidth=1.5)
        ax.text(i, y_max + 0.2, '*', ha='center', va='bottom', 
                fontsize=20, fontweight='bold')

# Customize the plot
ax.set_ylabel('Yield (mmol mol$^{-1}$)', fontsize=14, fontweight='bold')
ax.set_xlabel('Time interval (h)', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(time_intervals, fontsize=12)
ax.set_ylim(0, 3)
ax.set_yticks([0, 1, 2, 3])
ax.tick_params(axis='both', which='major', labelsize=11)

# Add legend
ax.legend(loc='upper left', fontsize=12, frameon=False)

# Add grid
ax.grid(axis='y', linestyle='-', alpha=0.3, zorder=0)
ax.set_axisbelow(True)

# Remove top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_linewidth(1.5)
ax.spines['bottom'].set_linewidth(1.5)

plt.tight_layout()
plt.show()

# Print statistical results
print("\nStatistical Analysis:")
print("-" * 50)
for i, interval in enumerate(time_intervals):
    print(f"\n{interval} hours:")
    print(f"  Induced: {induced_means[i]:.3f} ± {induced_sem[i]:.3f}")
    print(f"  Uninduced: {uninduced_means[i]:.3f} ± {uninduced_sem[i]:.3f}")
    print(f"  p-value: {p_values[i]:.4f} {'*' if p_values[i] < 0.05 else ''}")

../../_images/c02b853fdef015ebec75912d6ae263efe03b6919584a608d05efe36fcb4feecb.png

Statistical Analysis:
--------------------------------------------------

2-12 hours:
  Induced: 2.100 ± 0.029
  Uninduced: 1.950 ± 0.029
  p-value: 0.0213 *

12-16 hours:
  Induced: 2.400 ± 0.058
  Uninduced: 1.750 ± 0.029
  p-value: 0.0005 *

16-30 hours:
  Induced: 2.550 ± 0.029
  Uninduced: 1.730 ± 0.029
  p-value: 0.0000 *

Figure 2b (Rong, Frey et. al. 2024)

Contents

Figure 2b (Rong, Frey et. al. 2024)#

Claude Sonnet 4.5#