Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
ParseRes.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file ParseRes.c
5 
6  \version V1.15
7  \date 11.08.16
8  \brief Parse a residue specification
9 
10  \copyright (c) UCL / Dr. Andrew C. R. Martin 1993-2016
11  \author Dr. Andrew C. R. Martin
12  \par
13  Institute of Structural & Molecular Biology,
14  University College London,
15  Gower Street,
16  London.
17  WC1E 6BT.
18  \par
19  andrew@bioinf.org.uk
20  andrew.martin@ucl.ac.uk
21 
22 **************************************************************************
23 
24  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
25  according to the conditions laid out in the accompanying file
26  COPYING.DOC.
27 
28  The code may be modified as required, but any modifications must be
29  documented so that the person responsible can be identified.
30 
31  The code may not be sold commercially or included as part of a
32  commercial product except as described in the file COPYING.DOC.
33 
34 **************************************************************************
35 
36  Description:
37  ============
38 
39 
40 **************************************************************************
41 
42  Usage:
43  ======
44 
45 **************************************************************************
46 
47  Revision History:
48  =================
49 - V1.0 01.03.94 Original
50 - V1.1 07.07.95 Now non-destructive
51 - V1.2 17.07.95 Now checks that a number was specified as part of the
52  spec. and returns a BOOL
53 - V1.3 23.10.95 Moved FindResidueSpec() from PDBList.c
54 - V1.4 08.02.96 Added FindResidue() and changed FindResidueSpec() to
55  use it
56 - V1.5 23.07.96 Added AtomNameMatch() and LegalAtomSpec()
57 - V1.6 18.03.98 Added option to include a . to separate chain and
58  residue number so numeric chain names can be used
59 - V1.7 11.10.99 Allow a . to be used to start a number (such that the
60  default blank chain name is used). Allows negative
61  residue numbers
62 - V1.8 29.09.05 Moved ParseResSpec() into DoParseResSpec() with extra
63  param and added wrappers for ParseResSpec() and
64  ParseResSpecNoUpper() (Changes by Tony Lewis) By: TL
65 - V1.9 05.01.12 Default behaviour of ParseResSpec() is now not to
66  upcase the chain label - there are now too many PDB
67  entries with lower case chain names for this to be
68  sensible. By: ACRM
69 - V1.10 12.10.12 insert is now a properly terminated string when there is
70  no insert
71 - V1.11 28.08.13 chain is now a properly terminated string
72 - V1.12 26.02.14 Parsing handles multi-letter chains. By: CTP
73 - V1.13 07.07.14 Use bl prefix for functions By: CTP
74 - V1.14 10.03.15 Added blPrintResSpecHelp() By: ACRM
75  Removed blParseResSpecNoUpper() since blParseResSpec()
76  now does this
77 - V1.15 11.08.16 Added blBuildResSpec()
78 
79 *************************************************************************/
80 /* Doxygen
81  -------
82  #GROUP Handling PDB Data
83  #SUBGROUP Miscellaneous functions
84  #FUNCTION blParseResSpec()
85  Splits up a residue specification of the form
86  [c][.]num[i]
87  into chain, resnum and insert. Chain and insert code are not up-cased
88 
89  #FUNCTION blDoParseResSpec()
90  Splits up a residue specification of the form
91  [c][.]num[i]
92  into chain, resnum and insert. Gives control over up-casing
93 
94  #FUNCTION blPrintResSpecHelp()
95  Prints a help message on the residue specfication format to make
96  help messages more consistent
97 
98  #FUNCTION blBuildResSpec()
99  Creates a residue specification string
100 */
101 /************************************************************************/
102 /* Includes
103 */
104 #include <ctype.h>
105 #include <stdio.h>
106 #include <string.h>
107 
108 #include "macros.h"
109 #include "SysDefs.h"
110 #include "pdb.h"
111 
112 /************************************************************************/
113 /* Defines and macros
114 */
115 
116 /************************************************************************/
117 /* Globals
118 */
119 
120 /************************************************************************/
121 /* Prototypes
122 */
123 
124 /************************************************************************/
125 /*>BOOL blParseResSpec(char *spec, char *chain, int *resnum, char *insert)
126  -----------------------------------------------------------------------
127 *//**
128 
129  \param[in] *spec Residue specification
130  \param[out] *chain Chain label
131  \param[out] *resnum Residue number
132  \param[out] *insert Insert label
133  \return Success?
134 
135  Note that chain and insert must be arrays of at least 2 characters,
136  not character pointers
137 
138  Splits up a residue specification of the form
139  [c][.]num[i]
140  into chain, resnum and insert. Chain and insert are optional and will
141  be set to spaces if not specified. Converts the resiude specification
142  to upper case before processing.
143 
144  Moved the code that was here to a new function, DoParseResSpec()
145  and made this function just call that new function. See
146  DoParseResSpec()'s comments for notes on previous changes. This
147  move is to allow the underlying function to have an extra parameter
148  to specify whether or not the residue specification should be upper
149  cased (without affecting code that calls this function).
150 
151 - 29.09.05 Original By: TL
152 - 05.01.12 Now behaves the same as ParseResSpecNoUpper(). There are now
153  too many PDB files with lower case chain names (e.g. 1gav,
154  3n9r, etc.) for the old default behaviour or up-casing
155  everything. By: ACRM
156 - 07.07.14 Use bl prefix for functions By: CTP
157 */
158 BOOL blParseResSpec(char *spec, char *chain, int *resnum, char *insert)
159 {
160  return blDoParseResSpec(spec, chain, resnum, insert, FALSE);
161 }
162 
163 
164 /************************************************************************/
165 /*>BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum,
166  char *insert, BOOL uppercaseresspec)
167  -------------------------------------------------------------
168 *//**
169 
170  \param[in] *inSpec Residue specification
171  \param[out] *chain Chain label
172  \param[out] *resnum Residue number
173  \param[out] *insert Insert label
174  \param[in] uppercaseresspec Convert spec to upper case.
175  \return Success?
176 
177  Note that chain and insert must be arrays of at least 2 characters,
178  not character pointers
179 
180  Splits up a residue specification of the form
181  [c][.]num[i]
182  into chain, resnum and insert. Chain and insert are optional and will
183  be set to spaces if not specified. If uppercaseresspec equals TRUE,
184  the spec is upper cased before processing
185 
186  Multi-letter chain IDs can be parsed. Additionally, chain IDs with
187  numerical characters can be parsed if a period is used to separate the
188  chain from the residue number.
189 
190 - 21.07.93 Original By: ACRM
191 - 17.07.95 Added BOOL return
192 - 18.03.98 Added option to include a . to separate chain and residue
193  number so numeric chain names can be used
194 - 29.09.05 Moved this code to from ParseResSpec() to DoParseResSpec()
195  and made that function just call this new function.
196  This move is to allow this underlying function to have an
197  extra parameter to specify whether or not the residue
198  specification should be upper cased (without affecting code
199  that calls the old function). By: TL
200 - 12.10.12 insert is now a properly terminated string when there is
201  no insert
202 - 28.08.12 chain is now a properly terminated string
203  The input specification is now copied so that actual strings
204  can be passed into the routine as opposed to string delimited
205  variables. This also removes the need for restoring the
206  string which has now been removed
207 - 26.02.14 Parsing handles multi-letter chains and numerical chain IDs.
208  The "Extract chain from spec" section was re-written.
209  If the period separator between the chain id and the residue
210  number is absent then the chain id is set from any non-numeric
211  lead characters. By: CTP
212 - 07.07.14 Use bl prefix for functions By: CTP
213 */
214 BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum,
215  char *insert, BOOL uppercaseresspec)
216 {
217  char *ptr,
218  *ptr2,
219  spec[64];
220  BOOL /* DoRestore = FALSE, */
221  retval = TRUE,
222  chain_found = FALSE;
223  int i;
224 
225  strncpy(spec, inSpec, 64);
226 
227  /* 11.10.99 Default resnum of 0 */
228  *resnum = 0;
229 
230  /* Upper case the residue specification if it has been requested */
231  if (uppercaseresspec == TRUE)
232  {
233  UPPER(spec);
234  }
235  KILLLEADSPACES(ptr, spec);
236 
237  /* Extract chain from spec. Added 26.02.14 By: CTP */
238 
239  /* Extract chain from spec (dot format) */
240  for(ptr2=ptr,i=0;*ptr2;ptr2++,i++)
241  {
242  if(*ptr2 == '.')
243  {
244  /* set chain */
245  if(i > 0)
246  {
247  strncpy(chain,ptr,i);
248  chain[i] = '\0';
249  }
250  else
251  {
252  strcpy(chain," ");
253  }
254 
255  chain_found = TRUE;
256  ptr = ptr2 + 1; /* update start point */
257  break;
258  }
259  }
260 
261  /* Extract chain from spec (non-numeric lead characters) */
262  if(chain_found == FALSE)
263  {
264  for(ptr2=ptr,i=0;*ptr2;ptr2++,i++)
265  {
266  if(!isdigit(*ptr2) && (*ptr2 != '-'))
267  {
268  chain[i] = *ptr2;
269  chain[i+1] = '\0';
270  chain_found = TRUE;
271  ptr = ptr2 + 1; /* update start point */
272  }
273  else
274  {
275  break;
276  }
277  }
278  }
279 
280  /* Extract chain from spec (set chain to space) */
281  if(chain_found == FALSE)
282  {
283  strcpy(chain," ");
284  }
285 
286 
287  /* Extract insert from spec */
288  insert[0] = ' ';
289  insert[1] = '\0'; /* Added 12.10.12 */
290 
291  for(ptr2 = ptr; *ptr2; ptr2++)
292  {
293  /* 11.10.99 Now also checks that it isn't a - as the first
294  character
295  */
296  if(!isdigit(*ptr2) && ((ptr2!=ptr)||(*ptr2 != '-')))
297  {
298  insert[0] = *ptr2;
299  insert[1] = '\0';
300  *ptr2 = '\0';
301 /* DoRestore = TRUE; */
302  break;
303  }
304  }
305 
306  /* Extract residue number from spec */
307  if(sscanf(ptr,"%d",resnum) == 0)
308  retval = FALSE;
309 
310 /* if(DoRestore) */
311 /* { */
312  /* V1.1: Restore the original string */
313 /* *ptr2 = *insert; */
314 /* } */
315 
316  return(retval);
317 }
318 
319 
320 /************************************************************************/
321 /*>void blPrintResSpecHelp(FILE *fp)
322  ---------------------------------
323  \param[in] *fp File pointer
324 
325  Simply prints a help message fo how to use a residue specifier. Makes
326  help messages from programs more consistent.
327 
328  10.03.15 Original By: ACRM
329 */
330 void blPrintResSpecHelp(FILE *fp)
331 {
332  fprintf(fp,"resspec is a residue specification of the form \
333 [c[.]]nnn[i] where c is\n");
334  fprintf(fp,"a (multi-character) chain label optionally followed by \
335 a '.' (required\n");
336  fprintf(fp,"if the chain label is numeric), nnn is a residue number \
337 and i is an \n");
338  fprintf(fp,"optional insert code.\n");
339 }
340 
341 
342 /************************************************************************/
343 /*>void blBuildResSpec(PDB *p, char *resspec)
344  ------------------------------------------
345 *//**
346  \param[in] *p PDB record pointer
347 
348  Builds a residue specification string for a PDB record
349 
350 - 11.08.16 Original By: ACRM
351 */
352 void blBuildResSpec(PDB *p, char *resspec)
353 {
354  int chainLabelLen = strlen(p->chain);
355  char format[16];
356 
357 
358  if((chainLabelLen > 1) || isdigit(p->chain[chainLabelLen-1]))
359  {
360  strcpy(format, "%s.%d%s");
361  }
362  else
363  {
364  strcpy(format, "%s%d%s");
365  }
366 
367  sprintf(resspec, format, p->chain, p->resnum,
368  ((strlen(p->insert) && strcmp(p->insert, " "))?p->insert:""));
369 }
370 
Include file for PDB routines.
int resnum
Definition: pdb.h:310
short BOOL
Definition: SysDefs.h:64
Definition: pdb.h:298
void blBuildResSpec(PDB *p, char *resspec)
Definition: ParseRes.c:352
#define FALSE
Definition: macros.h:223
Useful macros.
BOOL blDoParseResSpec(char *inSpec, char *chain, int *resnum, char *insert, BOOL uppercaseresspec)
Definition: ParseRes.c:214
void blPrintResSpecHelp(FILE *fp)
Definition: ParseRes.c:330
#define TRUE
Definition: macros.h:219
#define KILLLEADSPACES(y, x)
Definition: macros.h:408
System-type variable type definitions.
#define UPPER(x)
Definition: macros.h:390
char chain[blMAXCHAINLABEL]
Definition: pdb.h:321
BOOL blParseResSpec(char *spec, char *chain, int *resnum, char *insert)
Definition: ParseRes.c:158
char insert[8]
Definition: pdb.h:320