@@ -96,18 +96,12 @@ def read_pdb_to_dataframe(
96
96
:rtype: pd.DataFrame
97
97
"""
98
98
if pdb_code is None and path is None and uniprot_id is None :
99
- raise NameError (
100
- "One of pdb_code, path or uniprot_id must be specified!"
101
- )
99
+ raise NameError ("One of pdb_code, path or uniprot_id must be specified!" )
102
100
103
101
if path is not None :
104
102
if isinstance (path , Path ):
105
103
path = os .fsdecode (path )
106
- if (
107
- path .endswith (".pdb" )
108
- or path .endswith (".pdb.gz" )
109
- or path .endswith (".ent" )
110
- ):
104
+ if path .endswith (".pdb" ) or path .endswith (".pdb.gz" ) or path .endswith (".ent" ):
111
105
atomic_df = PandasPdb ().read_pdb (path )
112
106
elif path .endswith (".mmtf" ) or path .endswith (".mmtf.gz" ):
113
107
atomic_df = PandasMmtf ().read_mmtf (path )
@@ -116,9 +110,7 @@ def read_pdb_to_dataframe(
116
110
f"File { path } must be either .pdb(.gz), .mmtf(.gz) or .ent, not { path .split ('.' )[- 1 ]} "
117
111
)
118
112
elif uniprot_id is not None :
119
- atomic_df = PandasPdb ().fetch_pdb (
120
- uniprot_id = uniprot_id , source = "alphafold2-v3"
121
- )
113
+ atomic_df = PandasPdb ().fetch_pdb (uniprot_id = uniprot_id , source = "alphafold2-v3" )
122
114
else :
123
115
atomic_df = PandasPdb ().fetch_pdb (pdb_code )
124
116
@@ -172,11 +164,7 @@ def label_node_id(
172
164
df ["node_id" ] = df ["node_id" ] + ":" + df ["atom_name" ]
173
165
elif granularity in {"rna_atom" , "rna_centroid" }:
174
166
df ["node_id" ] = (
175
- df ["node_id" ]
176
- + ":"
177
- + df ["atom_number" ].apply (str )
178
- + ":"
179
- + df ["atom_name" ]
167
+ df ["node_id" ] + ":" + df ["atom_number" ].apply (str ) + ":" + df ["atom_name" ]
180
168
)
181
169
return df
182
170
@@ -189,9 +177,7 @@ def deprotonate_structure(df: pd.DataFrame) -> pd.DataFrame:
189
177
:returns: Atomic dataframe with all ``element_symbol == "H" or "D" or "T"`` removed.
190
178
:rtype: pd.DataFrame
191
179
"""
192
- log .debug (
193
- "Deprotonating protein. This removes H atoms from the pdb_df dataframe"
194
- )
180
+ log .debug ("Deprotonating protein. This removes H atoms from the pdb_df dataframe" )
195
181
return filter_dataframe (
196
182
df ,
197
183
by_column = "element_symbol" ,
@@ -225,9 +211,7 @@ def convert_structure_to_centroids(df: pd.DataFrame) -> pd.DataFrame:
225
211
return df
226
212
227
213
228
- def subset_structure_to_atom_type (
229
- df : pd .DataFrame , granularity : str
230
- ) -> pd .DataFrame :
214
+ def subset_structure_to_atom_type (df : pd .DataFrame , granularity : str ) -> pd .DataFrame :
231
215
"""
232
216
Return a subset of atomic dataframe that contains only certain atom names.
233
217
@@ -241,9 +225,7 @@ def subset_structure_to_atom_type(
241
225
)
242
226
243
227
244
- def remove_alt_locs (
245
- df : pd .DataFrame , keep : str = "max_occupancy"
246
- ) -> pd .DataFrame :
228
+ def remove_alt_locs (df : pd .DataFrame , keep : str = "max_occupancy" ) -> pd .DataFrame :
247
229
"""
248
230
This function removes alternatively located atoms from PDB DataFrames
249
231
(see https://proteopedia.org/wiki/index.php/Alternate_locations). Among the
@@ -277,7 +259,7 @@ def remove_alt_locs(
277
259
# Unsort
278
260
if keep in ["max_occupancy" , "min_occupancy" ]:
279
261
df = df .sort_index ()
280
-
262
+ df = df . reset_index ( drop = True )
281
263
return df
282
264
283
265
@@ -307,9 +289,7 @@ def remove_insertions(
307
289
)
308
290
309
291
310
- def filter_hetatms (
311
- df : pd .DataFrame , keep_hets : List [str ]
312
- ) -> List [pd .DataFrame ]:
292
+ def filter_hetatms (df : pd .DataFrame , keep_hets : List [str ]) -> List [pd .DataFrame ]:
313
293
"""Return hetatms of interest.
314
294
315
295
:param df: Protein Structure dataframe to filter hetatoms from.
@@ -454,9 +434,7 @@ def sort_dataframe(df: pd.DataFrame) -> pd.DataFrame:
454
434
:return: Sorted protein dataframe.
455
435
:rtype: pd.DataFrame
456
436
"""
457
- return df .sort_values (
458
- by = ["chain_id" , "residue_number" , "atom_number" , "insertion" ]
459
- )
437
+ return df .sort_values (by = ["chain_id" , "residue_number" , "atom_number" , "insertion" ])
460
438
461
439
462
440
def select_chains (
@@ -558,8 +536,7 @@ def initialise_graph_with_metadata(
558
536
elif granularity == "atom" :
559
537
sequence = (
560
538
protein_df .loc [
561
- (protein_df ["chain_id" ] == c )
562
- & (protein_df ["atom_name" ] == "CA" )
539
+ (protein_df ["chain_id" ] == c ) & (protein_df ["atom_name" ] == "CA" )
563
540
]["residue_name" ]
564
541
.apply (three_to_one_with_mods )
565
542
.str .cat ()
@@ -610,13 +587,9 @@ def add_nodes_to_graph(
610
587
# Set intrinsic node attributes
611
588
nx .set_node_attributes (G , dict (zip (nodes , chain_id )), "chain_id" )
612
589
nx .set_node_attributes (G , dict (zip (nodes , residue_name )), "residue_name" )
613
- nx .set_node_attributes (
614
- G , dict (zip (nodes , residue_number )), "residue_number"
615
- )
590
+ nx .set_node_attributes (G , dict (zip (nodes , residue_number )), "residue_number" )
616
591
nx .set_node_attributes (G , dict (zip (nodes , atom_type )), "atom_type" )
617
- nx .set_node_attributes (
618
- G , dict (zip (nodes , element_symbol )), "element_symbol"
619
- )
592
+ nx .set_node_attributes (G , dict (zip (nodes , element_symbol )), "element_symbol" )
620
593
nx .set_node_attributes (G , dict (zip (nodes , coords )), "coords" )
621
594
nx .set_node_attributes (G , dict (zip (nodes , b_factor )), "b_factor" )
622
595
@@ -642,9 +615,7 @@ def calculate_centroid_positions(
642
615
:rtype: pd.DataFrame
643
616
"""
644
617
centroids = (
645
- atoms .groupby (
646
- ["residue_number" , "chain_id" , "residue_name" , "insertion" ]
647
- )
618
+ atoms .groupby (["residue_number" , "chain_id" , "residue_name" , "insertion" ])
648
619
.mean (numeric_only = True )[["x_coord" , "y_coord" , "z_coord" ]]
649
620
.reset_index ()
650
621
)
@@ -902,13 +873,9 @@ def _mp_graph_constructor(
902
873
func = partial (construct_graph , config = config )
903
874
try :
904
875
if source == "pdb_code" :
905
- return func (
906
- pdb_code = args [0 ], chain_selection = args [1 ], model_index = args [2 ]
907
- )
876
+ return func (pdb_code = args [0 ], chain_selection = args [1 ], model_index = args [2 ])
908
877
elif source == "path" :
909
- return func (
910
- path = args [0 ], chain_selection = args [1 ], model_index = args [2 ]
911
- )
878
+ return func (path = args [0 ], chain_selection = args [1 ], model_index = args [2 ])
912
879
elif source == "uniprot_id" :
913
880
return func (
914
881
uniprot_id = args [0 ],
@@ -1004,9 +971,7 @@ def construct_graphs_mp(
1004
971
)
1005
972
if out_path is not None :
1006
973
[
1007
- nx .write_gpickle (
1008
- g , str (f"{ out_path } /" + f"{ g .graph ['name' ]} .pickle" )
1009
- )
974
+ nx .write_gpickle (g , str (f"{ out_path } /" + f"{ g .graph ['name' ]} .pickle" ))
1010
975
for g in graphs
1011
976
]
1012
977
@@ -1070,15 +1035,11 @@ def compute_chain_graph(
1070
1035
1071
1036
# Add edges
1072
1037
for u , v , d in g .edges (data = True ):
1073
- h .add_edge (
1074
- g .nodes [u ]["chain_id" ], g .nodes [v ]["chain_id" ], kind = d ["kind" ]
1075
- )
1038
+ h .add_edge (g .nodes [u ]["chain_id" ], g .nodes [v ]["chain_id" ], kind = d ["kind" ])
1076
1039
# Remove self-loops if necessary. Checks for equality between nodes in a
1077
1040
# given edge.
1078
1041
if remove_self_loops :
1079
- edges_to_remove : List [Tuple [str ]] = [
1080
- (u , v ) for u , v in h .edges () if u == v
1081
- ]
1042
+ edges_to_remove : List [Tuple [str ]] = [(u , v ) for u , v in h .edges () if u == v ]
1082
1043
h .remove_edges_from (edges_to_remove )
1083
1044
1084
1045
# Compute a weighted graph if required.
@@ -1181,16 +1142,10 @@ def compute_secondary_structure_graph(
1181
1142
ss_list = ss_list [~ ss_list .str .contains ("-" )]
1182
1143
# Subset to only allowable SS elements if necessary
1183
1144
if allowable_ss_elements :
1184
- ss_list = ss_list [
1185
- ss_list .str .contains ("|" .join (allowable_ss_elements ))
1186
- ]
1145
+ ss_list = ss_list [ss_list .str .contains ("|" .join (allowable_ss_elements ))]
1187
1146
1188
- constituent_residues : Dict [str , List [str ]] = ss_list .index .groupby (
1189
- ss_list .values
1190
- )
1191
- constituent_residues = {
1192
- k : list (v ) for k , v in constituent_residues .items ()
1193
- }
1147
+ constituent_residues : Dict [str , List [str ]] = ss_list .index .groupby (ss_list .values )
1148
+ constituent_residues = {k : list (v ) for k , v in constituent_residues .items ()}
1194
1149
residue_counts : Dict [str , int ] = ss_list .groupby (ss_list ).count ().to_dict ()
1195
1150
1196
1151
# Add Nodes from secondary structure list
@@ -1209,9 +1164,7 @@ def compute_secondary_structure_graph(
1209
1164
# Iterate over edges in source graph and add SS-SS edges to new graph.
1210
1165
for u , v , d in g .edges (data = True ):
1211
1166
try :
1212
- h .add_edge (
1213
- ss_list [u ], ss_list [v ], kind = d ["kind" ], source = f"{ u } _{ v } "
1214
- )
1167
+ h .add_edge (ss_list [u ], ss_list [v ], kind = d ["kind" ], source = f"{ u } _{ v } " )
1215
1168
except KeyError as e :
1216
1169
log .debug (
1217
1170
f"Edge { u } -{ v } not added to secondary structure graph. \
@@ -1221,9 +1174,7 @@ def compute_secondary_structure_graph(
1221
1174
# Remove self-loops if necessary.
1222
1175
# Checks for equality between nodes in a given edge.
1223
1176
if remove_self_loops :
1224
- edges_to_remove : List [Tuple [str ]] = [
1225
- (u , v ) for u , v in h .edges () if u == v
1226
- ]
1177
+ edges_to_remove : List [Tuple [str ]] = [(u , v ) for u , v in h .edges () if u == v ]
1227
1178
h .remove_edges_from (edges_to_remove )
1228
1179
1229
1180
# Create weighted graph from h
0 commit comments