Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
atomtype.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file atomtype.c
5 
6  \version V2.0
7  \date 22.07.15
8  \brief Set atom types in a PDB file
9 
10  \copyright (c) Dr. Andrew C. R. Martin, UCL, 1999-2015
11  \author Dr. Andrew C. R. Martin
12  \par
13  Institute of Structural & Molecular Biology,
14  University College London,
15  Gower Street,
16  London.
17  WC1E 6BT.
18  \par
19  andrew@bioinf.org.uk
20  andrew.martin@ucl.ac.uk
21 
22 **************************************************************************
23 
24  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
25  according to the conditions laid out in the accompanying file
26  COPYING.DOC.
27 
28  The code may be modified as required, but any modifications must be
29  documented so that the person responsible can be identified.
30 
31  The code may not be sold commercially or included as part of a
32  commercial product except as described in the file COPYING.DOC.
33 
34 **************************************************************************
35 
36  Description:
37  ============
38  This code sets atomtypes for PDB atoms. See pdb.h/ATOMTYPE_xxxx
39  for the types
40 
41 **************************************************************************
42 
43  Usage:
44  ======
45 
46 **************************************************************************
47 
48  Revision History:
49  =================
50 - V1.0 23.03.99 Original By: ACRM
51 - V1.1 27.04.99 Fixes in SetPDBAtomTypesNSResidues() with O3* and P
52 - V1.2 18.06.99 Fixes in SetPDBAtomTypesNSResidues() BOOL return and
53  force parameter
54 - V1.3 21.06.99 Fixes in SetPDBAtomTypesNSResidues() Force now for
55  individual issues
56 - V1.4 06.09.99 Fixes in SetPDBAtomTypesNSResidues() Further simple
57  check for nucleotides.
58 - V2.0 22.07.15 All force code etc removed - now returns a list
59  of warnings
60 
61 *************************************************************************/
62 /* Includes
63 */
64 #include "general.h"
65 #include "pdb.h"
66 
67 /************************************************************************/
68 /* Defines and macros
69 */
70 #define MAXBUFF 240
71 
72 /************************************************************************/
73 /* Globals
74 */
75 
76 /************************************************************************/
77 /* Prototypes
78 */
79 static STRINGLIST *SetPDBAtomTypesNSResidues(PDB *pdb);
80 static void InitializePDBAtomTypes(PDB *pdb);
81 static void SetPDBAtomTypesModifiers(PDB *pdb);
82 static void SetPDBAtomTypesWaterAndNucleotides(PDB *pdb);
83 static void SetPDBAtomTypesMetals(PDB *pdb);
84 
85 /************************************************************************/
86 /*>STRINGLIST *blSetPDBAtomTypes(PDB *pdb)
87  ---------------------------------------
88 *//**
89  \param[in,out] *pdb PDB linked list
90  \return STRINGLIST of any warning messages
91  NULL if all OK
92 
93 - 21.07.15 Original By: ACRM
94 */
96 {
97  InitializePDBAtomTypes(pdb);
98  SetPDBAtomTypesMetals(pdb);
99  SetPDBAtomTypesWaterAndNucleotides(pdb);
100  SetPDBAtomTypesModifiers(pdb);
101  return(SetPDBAtomTypesNSResidues(pdb));
102 }
103 
104 
105 /************************************************************************/
106 /*>static void SetPDBAtomTypesMetals(PDB *pdb)
107  -------------------------------------------
108 *//**
109  \param[in,out] *pdb PDB linked list
110 
111  Identifies and sets metal atoms
112 
113 - 21.07.15 Original By: ACRM
114 */
115 static void SetPDBAtomTypesMetals(PDB *pdb)
116 {
117  PDB *p;
118  /* Update HETATMs to metals and waters */
119  for(p=pdb; p!=NULL; NEXT(p))
120  {
121  if(p->atomtype == ATOMTYPE_HETATM)
122  {
123  /* This is a list of non-metals in something like the order of
124  likelihood of occurrence. We don't include noble gases since
125  if these are found (unlikely!) they will be unbound and can
126  be thought of as metals.
127  */
128  if(strcmp(p->element,"C") &&
129  strcmp(p->element,"N") &&
130  strcmp(p->element,"O") &&
131  strcmp(p->element,"H") &&
132  strcmp(p->element,"S") &&
133  strcmp(p->element,"P") &&
134  strcmp(p->element,"CL") &&
135  strcmp(p->element,"BR") &&
136  strcmp(p->element,"I") &&
137  strcmp(p->element,"F") &&
138  strcmp(p->element,"B") &&
139  strcmp(p->element,"SI") &&
140  strcmp(p->element,"AS") &&
141  strcmp(p->element,"SE") &&
142  strcmp(p->element,"TE") &&
143  strcmp(p->element,"AT"))
144  {
146  }
147  }
148  }
149 }
150 
151 
152 /************************************************************************/
153 /*>static void SetPDBAtomTypesWaterAndNucleotides(PDB *pdb)
154  --------------------------------------------------------
155 *//**
156  \param[in,out] *pdb PDB linked list
157 
158  Sets atom types for waters and nucleotides - i.e. modifies the
159  ATOM and HETATM settings
160 
161 - 21.07.15 Original By: ACRM
162 */
163 static void SetPDBAtomTypesWaterAndNucleotides(PDB *pdb)
164 {
165  PDB *p;
166 
167  /* Update HETATMs to metals and waters */
168  for(p=pdb; p!=NULL; NEXT(p))
169  {
170  if(ISWATER(p))
171  {
173  }
174  else if(p->atomtype == ATOMTYPE_ATOM)
175  {
176  if(!strncmp(p->resnam,"A ",3) ||
177  !strncmp(p->resnam,"C ",3) ||
178  !strncmp(p->resnam,"G ",3) ||
179  !strncmp(p->resnam,"I ",3) ||
180  !strncmp(p->resnam,"T ",3) ||
181  !strncmp(p->resnam,"Y ",3) ||
182  !strncmp(p->resnam,"U ",3) ||
183  !strncmp(p->resnam,"DA ",3) ||
184  !strncmp(p->resnam,"DC ",3) ||
185  !strncmp(p->resnam,"DT ",3) ||
186  !strncmp(p->resnam,"DG ",3) ||
187  !strncmp(p->resnam,"+A ",3) ||
188  !strncmp(p->resnam,"+C ",3) ||
189  !strncmp(p->resnam,"+G ",3) ||
190  !strncmp(p->resnam,"+I ",3) ||
191  !strncmp(p->resnam,"+T ",3) ||
192  !strncmp(p->resnam,"+Y ",3) ||
193  !strncmp(p->resnam,"+U ",3))
194  {
195  p->atomtype = ATOMTYPE_NUC;
196  }
197  }
198  }
199 
200 }
201 
202 
203 /************************************************************************/
204 /*>static void SetPDBAtomTypesModifiers(PDB *pdb)
205  ----------------------------------------------
206 *//**
207  \param[in,out] *pdb PDB linked list
208 
209  Updates HETATMs to indicate if they are bound to ATOMs - i.e. they
210  are residue modifiers
211 
212 - 21.07.15 Original By: ACRM
213 */
214 static void SetPDBAtomTypesModifiers(PDB *pdb)
215 {
216  PDB *p, *q;
217  int i;
218  BOOL doneMod;
219 
220  /* Now look for connections between HETATMs and ATOMs */
221  for(p=pdb; p!=NULL; NEXT(p))
222  {
223  if(p->atomtype == ATOMTYPE_HETATM)
224  {
225  for(i=0; i<p->nConect; i++)
226  {
227  q = p->conect[i];
228  if(p->atomtype == ATOMTYPE_ATOM)
229  {
231  }
232  else if(p->atomtype == ATOMTYPE_NUC)
233  {
235  }
236  }
237  }
238  }
239 
240  /* Now update all the HETATMs that are connected to MODPROT or
241  MODNUC
242  */
243  do
244  {
245  doneMod = FALSE;
246  for(p=pdb; p!=NULL; NEXT(p))
247  {
248  for(i=0; i<p->nConect; i++)
249  {
250  q = p->conect[i];
251 
252  if(p->atomtype == ATOMTYPE_MODPROT &&
254  {
256  doneMod = TRUE;
257  }
258  else if(p->atomtype == ATOMTYPE_MODNUC &&
260  {
262  doneMod = TRUE;
263  }
264  else if(p->atomtype == ATOMTYPE_BOUNDHET &&
266  {
268  doneMod = TRUE;
269  }
270  }
271  }
272  } while(doneMod);
273 }
274 
275 
276 /************************************************************************/
277 /*>static STRINGLIST *SetPDBAtomTypesNSResidues(PDB *pdb)
278  ------------------------------------------------------
279 *//**
280  \param[in,out] pdb PDB linked list
281  \return A STRINGLIST containing any warning
282  messages
283 
284  Goes through BOUNDHET atoms and changes them to Non-standard residue
285  atoms if they are linked via the backbone.
286 
287 - 23.03.99 Original By: ACRM
288 - 27.04.99 .O3* and .P.. were the wrong way round so NSNUC not being
289  identified!
290 - 18.06.99 Added BOOL return and check on chain being correct (to catch
291  1ubs) and force parameter
292 - 21.06.99 Force now works for individual issues rather than on/off
293  for everything
294 - 06.09.99 Further simple check for nucleotides. If a non-standard
295  residue (not recognised as a nucleotide) contains a
296  phosphorus and has a nucleotide on either side in the same
297  chain, then set it to a nonstandard nucleotide. Fixes 1gsg
298  (which is backbone only), 1ser (where the distance is too
299  long to get flagged as boundhet).
300 */
301 static STRINGLIST *SetPDBAtomTypesNSResidues(PDB *pdb)
302 {
303  STRINGLIST *warnings = NULL;
304  PDB *p, *q,
305  *res1 = NULL,
306  *res2 = NULL,
307  *res3 = NULL,
308  *res0 = NULL;
309  int nsVal = 0;
310 
311  for(res1=pdb; res1!=NULL; res1=res2)
312  {
313  /* Replacement non-standard value. 0 indicates not found to be
314  a non-standard residue
315  */
316  nsVal=0;
317 
318  if(res1!=NULL) res2 = blFindNextResidue(res1);
319  if(res2!=NULL) res3 = blFindNextResidue(res2);
320 
321  /* If it's a bound het:
322  If bound to following N or preceeding C, set type to NONSTDAA
323  If bound to following P or preceeding O3*, set type to NONSTDNUC
324  */
325  if(res1->atomtype == ATOMTYPE_BOUNDHET)
326  {
327  for(p=res1; p!=res2; NEXT(p))
328  {
329  /* Search next residue */
330  for(q=res2; q!=res3; NEXT(q))
331  {
332  if(!strncmp(q->atnam,"N ",4))
333  {
334  if(blIsConected(p, q))
335  {
336  /* 18.06.99 Check they are in the same chain */
337  if(!PDBCHAINMATCH(p, q))
338  {
339  char buffer[MAXBUFF];
340  sprintf(buffer,"Warning: Apparent \
341 non-standard amino acid has different chain\n\
342  label from amino acid Nitrogen to which it is connected\n\
343  Residue %s %s%d%s\n",
344  res1->resnam,
345  res1->chain,
346  res1->resnum,
347  res1->insert);
348  warnings = blStoreString(warnings, buffer);
349  }
350 
351  nsVal = ATOMTYPE_NONSTDAA;
352  p=NULL;
353  break;
354  }
355  }
356  if(!strncmp(q->atnam,"P ",4))
357  {
358  if(blIsConected(p, q))
359  {
360  /* 18.06.99 Check they are in the same chain */
361  if(!PDBCHAINMATCH(p, q))
362  {
363  char buffer[MAXBUFF];
364  sprintf(buffer,"Warning: Apparent \
365 non-standard nucleotide has different chain\n\
366  label from nucleotide phosphorus to which it is connected\n\
367  Residue %s %s%d%s\n",
368  res1->resnam,
369  res1->chain,
370  res1->resnum,
371  res1->insert);
372  warnings = blStoreString(warnings, buffer);
373  }
374 
375  nsVal = ATOMTYPE_NONSTDNUC;
376  p=NULL;
377  break;
378  }
379  }
380  }
381  if(nsVal==0)
382  {
383  /* Search previous residue */
384  for(q=res0; q!=NULL && q!=res1; NEXT(q))
385  {
386  if(!strncmp(q->atnam,"C ",4))
387  {
388  if(blIsConected(p, q))
389  {
390  /* 18.06.99 Check they are in the same chain */
391  if(!PDBCHAINMATCH(p, q))
392  {
393  char buffer[MAXBUFF];
394  sprintf(buffer,"Warning: Apparent \
395 non-standard amino acid has different chain\n\
396  label from amino acid Carbon to which it is connected\n\
397  Residue %s %s%d%s\n",
398  res1->resnam,
399  res1->chain,
400  res1->resnum,
401  res1->insert);
402  warnings = blStoreString(warnings, buffer);
403  }
404 
405  nsVal = ATOMTYPE_NONSTDAA;
406  p=NULL;
407  break;
408  }
409  }
410  if(!strncmp(q->atnam,"O3* ",4))
411  {
412  if(blIsConected(p, q))
413  {
414  /* 18.06.99 Check they are in the same chain */
415  if(!PDBCHAINMATCH(p, q))
416  {
417  char buffer[MAXBUFF];
418  sprintf(buffer,"Warning: Apparent \
419 non-standard nucleotide has different chain\n\
420  label from nucleotide O3* to which it is connected\n\
421  Residue %s %s%d%s\n",
422  res1->resnam,
423  res1->chain,
424  res1->resnum,
425  res1->insert);
426  warnings = blStoreString(warnings, buffer);
427  }
428  nsVal = ATOMTYPE_NONSTDNUC;
429  p=NULL;
430  break;
431  }
432  }
433  }
434  }
435  if(p==NULL)
436  break;
437  }
438  }
439 
440  /* 06.09.99 Further simple check for nucleotides. If res1 is of
441  type ATOM with res0 or res2 in the same chain and of type
442  NUC, then we check that res1 contains a Phosphorus and, if
443  so, we assume res1 is a nonstandard nucleotide. Fixes 1gsg
444  (which is backbone only), 1ser (where the distance is too
445  long to get flagged as boundhet).
446  */
447  if(!nsVal && /* Not done already */
448  (res1 != NULL) && (res1->atomtype == ATOMTYPE_ATOM) &&
449  (((res0 != NULL) && /* residue before */
450  ((res0->atomtype == ATOMTYPE_NUC) ||
451  (res0->atomtype == ATOMTYPE_NONSTDNUC)) &&
452  PDBCHAINMATCH(res0, res1)) ||
453  ((res2 != NULL) && /* residue after */
454  ((res2->atomtype == ATOMTYPE_NUC) ||
455  (res2->atomtype == ATOMTYPE_NONSTDNUC)) &&
456  PDBCHAINMATCH(res2, res1))))
457  {
458  for(p=res1; p!=res2; NEXT(p))
459  {
460  if(!strcmp(p->element,"P"))
461  {
462  nsVal = ATOMTYPE_NONSTDNUC;
463  break;
464  }
465  }
466  }
467 
468  /* If we found one of these links then modify all atoms in
469  this residue
470  */
471  if(nsVal)
472  {
473  for(p=res1; p!=res2; NEXT(p))
474  {
475  p->atomtype = nsVal;
476  }
477  }
478 
479  res0=res1;
480  }
481 
482  return(warnings);
483 }
484 
485 
486 /************************************************************************/
487 /*>static void InitializePDBAtomTypes(PDB *pdb)
488  --------------------------------------------
489 *//**
490  \param[in,out] *pdb PDB linked list
491 
492  Initializes atom types based on ATOM or HETATM information
493 
494 - 21.07.15 Original By: ACRM
495 */
496 static void InitializePDBAtomTypes(PDB *pdb)
497 {
498  PDB *p;
499 
500  /* Initialize atom types based on record types */
501  for(p=pdb; p!=NULL; NEXT(p))
502  {
503  if(!strncmp(p->record_type, "ATOM ", 6))
504  {
505  p->atomtype = ATOMTYPE_ATOM;
506  }
507  else if(!strncmp(p->record_type, "HETATM", 6))
508  {
510  }
511  else
512  {
514  }
515  }
516 }
#define ATOMTYPE_MODNUC
Definition: pdb.h:599
#define ATOMTYPE_BOUNDHET
Definition: pdb.h:605
#define ATOMTYPE_MODPROT
Definition: pdb.h:598
Include file for PDB routines.
#define ATOMTYPE_NONSTDAA
Definition: pdb.h:600
int resnum
Definition: pdb.h:310
short BOOL
Definition: SysDefs.h:64
#define NULL
Definition: array2.c:99
#define MAXBUFF
Definition: atomtype.c:70
Definition: pdb.h:298
#define FALSE
Definition: macros.h:223
#define NEXT(x)
Definition: macros.h:249
STRINGLIST * blSetPDBAtomTypes(PDB *pdb)
Definition: atomtype.c:95
char record_type[8]
Definition: pdb.h:315
int nConect
Definition: pdb.h:312
char atnam[8]
Definition: pdb.h:316
char resnam[8]
Definition: pdb.h:319
char element[8]
Definition: pdb.h:322
#define ATOMTYPE_ATOM
Definition: pdb.h:596
#define PDBCHAINMATCH(p, q)
Definition: pdb.h:498
BOOL blIsConected(PDB *p, PDB *q)
Definition: BuildConect.c:629
#define TRUE
Definition: macros.h:219
#define ATOMTYPE_NONSTDNUC
Definition: pdb.h:601
#define ATOMTYPE_WATER
Definition: pdb.h:604
#define ATOMTYPE_HETATM
Definition: pdb.h:602
#define ATOMTYPE_UNDEF
Definition: pdb.h:595
STRINGLIST * blStoreString(STRINGLIST *StringList, char *string)
Definition: StoreString.c:131
Header file for general purpose routines.
#define ISWATER(z)
Definition: pdb.h:487
PDB * blFindNextResidue(PDB *pdb)
#define ATOMTYPE_METAL
Definition: pdb.h:603
#define ATOMTYPE_NUC
Definition: pdb.h:597
int atomtype
Definition: pdb.h:314
struct pdb_entry * conect[MAXCONECT]
Definition: pdb.h:308
char chain[blMAXCHAINLABEL]
Definition: pdb.h:321
char insert[8]
Definition: pdb.h:320