-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRog_density_plot_ButForCrowded.py
More file actions
154 lines (123 loc) · 5.5 KB
/
Rog_density_plot_ButForCrowded.py
File metadata and controls
154 lines (123 loc) · 5.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import yticks
"""
This script analyzes Radius of Gyration (RoG) data for the RbsB protein
from molecular dynamics simulations under different force fields.
It performs the following operations:
1. Scans a specific directory structure to find RoG data files ('RbsB_gyrate.xvg')
for AMBER, CHARMM (AA), and SIRAH force fields across multiple simulation runs ('Take_1' to 'Take_5').
2. Parses the '.xvg' files to extract the total Radius of Gyration value, which is
the second column in the data section.
3. Aggregates the data from all runs for each force field.
4. Generates a single, publication-quality density plot ('RoG_RbsB_comparison.png')
that overlays the RoG distributions for the different force fields, allowing for
direct comparison.
5. Includes reference RoG values from X-ray structures as vertical dashed lines on the plot.
"""
# --- Configuration ---
# Base directory where the force field folders are located.
BASE_DIR = "/ceph/users/akv16273/34_Elcock_13abundant_inEColi/21_Analysis_Datastructure/"
# Map folder names to the force field names for plotting.
FORCE_FIELD_CONFIG = {
"20_AMBER": {"name": "AMBER"},
"12_AA": {"name": "CHARMM"},
"13_SIRAH": {"name": "SIRAH"}
}
# Number of simulation runs to look for in each force field directory.
TAKE_COUNT = 5
# --- Core Functions ---
def load_rog_data(filename):
"""
Loads total Radius of Gyration data from a GROMACS .xvg file.
Based on the file format, the total Rg is the second column (index 1).
This function reads that column, skipping all header and comment lines.
Args:
filename (str): The path to the .xvg file.
Returns:
numpy.ndarray: A 1D array of RoG values, or None if the file is empty or an error occurs.
"""
try:
# Load the data, skipping headers (@, #) and using only the second column (index 1).
data = np.loadtxt(
filename,
comments=['@', '#'],
usecols=(1,)
)
return data
except (ValueError, IndexError) as e:
print(f"Error loading data from {filename}: {e}")
return None
def plot_combined_rog_densities(data_collection):
"""
Generates and saves a single plot with overlaid RoG density histograms.
Args:
data_collection (dict): A dictionary where keys are force field names
and values are arrays of RoG data.
"""
fig, ax = plt.subplots(figsize=(6, 4), dpi=600)
colors = {'AMBER': 'olive', 'CHARMM': 'cyan', 'SIRAH': 'brown'}
for force_field, data in data_collection.items():
if data is not None and len(data) > 0:
ax.hist(data, bins=50, density=True, alpha=0.6, color=colors.get(force_field, 'gray'), label=force_field)
# Add vertical dashed lines at reference X-ray structure RoG values
ax.axvline(2.0891, color='purple', linestyle='--', label='1ba2_A X-ray')
ax.axvline(2.0480, color='green', linestyle='--', label='1ba2_B X-ray')
ax.axvline(2.0214, color='yellow', linestyle='--', label='1urp X-ray')
ax.axvline(1.9217, color='red', linestyle='--', label='2DRI X-ray')
# --- Formatting ---
ax.set_xlabel('Radius of Gyration (nm)', fontsize=20)
ax.set_ylabel('Density', fontsize=20)
# ax.set_title('RbsB Radius of Gyration Comparison', fontsize=16)
ax.set_xlim(1.85, 2.20)
ax.set_ylim(0, 55.0)
ax.set_xticks(np.arange(1.85, 2.20, 0.1))
ax.tick_params(axis='both', which='major', labelsize=18)
ax.grid(True, linestyle='-', alpha=0.8)
# ax.legend()
# Save the figure
output_filename = "RoG_RbsB_comparison_noLegend.png"
plt.savefig(output_filename, bbox_inches='tight')
plt.close()
print(f"\nComparative plot saved as {output_filename}")
def main():
"""
Main function to drive the data loading, processing, and plotting.
"""
# This dictionary will hold the aggregated RoG data for each force field.
# e.g., {'AMBER': [2.1, 2.2, ...], 'CHARMM': [1.9, 2.0, ...]}
aggregated_data = {}
print("Starting analysis of RbsB Radius of Gyration...")
# Iterate through each configured force field
for ff_folder, config in FORCE_FIELD_CONFIG.items():
ff_name = config['name']
ff_data = []
print(f"\nProcessing Force Field: {ff_name}")
# Handle the special subdirectory for the '12_AA' force field
current_path = os.path.join(BASE_DIR, ff_folder)
if ff_folder == "12_AA":
current_path = os.path.join(current_path, "1_monomer")
# Loop through each 'Take_*' directory
for i in range(1, TAKE_COUNT + 1):
take_folder = f"Take_{i}"
filepath = os.path.join(current_path, take_folder, "RbsB_gyrate.xvg")
if os.path.exists(filepath):
print(f" Loading data from: {filepath}")
data = load_rog_data(filepath)
if data is not None:
ff_data.extend(data)
else:
print(f" Warning: File not found - {filepath}")
if ff_data:
aggregated_data[ff_name] = np.array(ff_data)
print(f" -> Successfully aggregated {len(ff_data)} data points for {ff_name}.")
else:
print(f" -> No data found for {ff_name}.")
# Generate the final combined plot
if aggregated_data:
plot_combined_rog_densities(aggregated_data)
else:
print("\nNo data was aggregated. Cannot generate plot.")
if __name__ == "__main__":
main()