Here is Python code defining a command “modelcif pae” that plots pairwise residue scores found in the ma_qa_metric_local_pairwise table found in some ModelCIF files. These scores are often predicted aligned error values from AlphaFold. The command writes a file in AlhpaFold JSON PAE format and opens it with the alphafold pae command. Displaying pairwise scores was requested by Gerardo Tauriello on the ChimeraX mailing list.
Here’s an ChimeraX example using a Pombe histone H3/H4 and DPB3 complex ModelCIF file 003-Spombe_H3-H4_tetramer_DPB3.cif.
open ~/Downloads/003-Spombe_H3-H4_tetramer_DPB3.cif
Open the modelcif_pae.py Python code to define the modelcif pae command
open ~/Downloads/modelcif_pae.py
Then create the score plot with
modelcif pae #1
Here is the modelcif_pae.py code:
# Read pairwise residue scores from a ModelCIF file and plot them in ChimeraX.
def modelcif_pae(session, structure, json_output_path = None, metric_id = None, default_score = 100):
matrix = read_pairwise_scores(structure, metric_id = metric_id, default_score = default_score)
if json_output_path is None:
import tempfile
temp = tempfile.NamedTemporaryFile(prefix = 'modelcif_pae_', suffix = '.json')
json_output_path = temp.name
write_json_pae_file(json_output_path, matrix)
# Open PAE plot
from chimerax.core.commands import run, quote_if_necessary
open_cmd = f'alphafold pae #{structure.id_string} file {quote_if_necessary(json_output_path)}'
run(session, open_cmd)
def read_pairwise_scores(structure, metric_id = None, default_score = 100):
if not hasattr(structure, 'filename'):
from chimerax.core.errors import UserError
raise UserError(f'Structure {structure} has no associated file')
values = read_ma_qa_metric_local_pairwise_table(structure.filename)
if values is None:
from chimerax.core.errors import UserError
raise UserError(f'Structure file {structure.filename} contains no pairwise residue scores (i.e. no table "ma_qa_metric_local_pairwise")')
# Use only the scores with the given metric id.
if metric_id is None and len(values) > 0:
metric_id = values[0][5]
values = [v for v in values if v[5] == metric_id]
if len(values) == 0:
from chimerax.core.errors import UserError
raise UserError(f'Structure file {structure.filename} has no scores for metric id "{metric_id}"')
matrix_index = {(r.chain_id,r.number):ri for ri,r in enumerate(structure.residues)}
nr = structure.num_residues
from numpy import empty, float32
matrix = empty((nr,nr), float32)
matrix[:] = default_score
for model_id, chain_id_1, res_num_1, chain_id_2, res_num_2, metric_id, metric_value in values:
res_num_1, res_num_2, metric_value = int(res_num_1), int(res_num_2), float(metric_value)
r1 = matrix_index[(chain_id_1, res_num_1)]
r2 = matrix_index[(chain_id_2, res_num_2)]
matrix[r1,r2] = metric_value
return matrix
def read_ma_qa_metric_local_pairwise_table(path):
from chimerax.mmcif import get_cif_tables
table_names = ['ma_qa_metric_local_pairwise']
try:
tables = get_cif_tables(path, table_names)
except TypeError:
# Bug in get_cif_tables() results in TypeError if table not present. Ticket #16054
return None
if len(tables) == 0:
return None
ma_qa_metric_local_pairwise = tables[0]
field_names = ['model_id',
'label_asym_id_1', 'label_seq_id_1',
'label_asym_id_2', 'label_seq_id_2',
'metric_id', 'metric_value']
values = ma_qa_metric_local_pairwise.fields(field_names)
return values
def write_json_pae_file(json_output_path, matrix):
# Write matrix in JSON AlphaFold PAE format
# {"pae": [[17.14, 18.75, 17.91, ...], [5.32, 8.23, ...], ... ]}
n = matrix.shape[0]
dists = ', '.join(('[ ' + ', '.join('%.2f' % matrix[i,j] for j in range(n)) + ' ]')
for i in range(n))
with open(json_output_path, 'w') as file:
file.write('{"pae": [')
file.write(dists)
file.write(']}')
def register_command(logger):
from chimerax.core.commands import CmdDesc, register, StringArg, FloatArg, SaveFileNameArg
from chimerax.atomic import StructureArg
desc = CmdDesc(
required = [('structure', StructureArg)],
keyword = [('metric_id', StringArg),
('default_score', FloatArg),
('json_output_path', SaveFileNameArg)],
synopsis = 'Plot ModelCIF pairwise residue scores'
)
register('modelcif pae', desc, modelcif_pae, logger=logger)
register_command(session.logger)
Tom Goddard, October 1, 2024