Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
AtomNameMatch.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file AtomNameMatch.c
5 
6  \version V1.8
7  \date 07.07.14
8  \brief Tests for matching atom names with wild cards
9 
10  \copyright (c) UCL / Dr. Andrew C. R. Martin 1993-9
11  \author Dr. Andrew C. R. Martin
12  \par
13  Institute of Structural & Molecular Biology,
14  University College London,
15  Gower Street,
16  London.
17  WC1E 6BT.
18  \par
19  andrew@bioinf.org.uk
20  andrew.martin@ucl.ac.uk
21 
22 **************************************************************************
23 
24  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
25  according to the conditions laid out in the accompanying file
26  COPYING.DOC.
27 
28  The code may be modified as required, but any modifications must be
29  documented so that the person responsible can be identified.
30 
31  The code may not be sold commercially or included as part of a
32  commercial product except as described in the file COPYING.DOC.
33 
34 **************************************************************************
35 
36  Description:
37  ============
38 
39 
40 **************************************************************************
41 
42  Usage:
43  ======
44 
45 **************************************************************************
46 
47  Revision History:
48  =================
49 - V1.0 01.03.94 Original
50 - V1.1 07.07.95 Now non-destructive
51 - V1.2 17.07.95 Now checks that a number was specified as part of the
52  spec. and returns a BOOL
53 - V1.3 23.10.95 Moved FindResidueSpec() from PDBList.c
54 - V1.4 08.02.96 Added FindResidue() and changed FindResidueSpec() to
55  use it
56 - V1.5 23.07.96 Added AtomNameMatch() and LegalAtomSpec()
57 - V1.6 18.03.98 Added option to include a . to separate chain and
58  residue number so numeric chain names can be used
59 - V1.7 11.10.99 Allow a . to be used to start a number (such that the
60  default blank chain name is used). Allows negative
61  residue numbers
62 - V1.8 07.07.14 Use bl prefix for functions By: CTP
63 
64 *************************************************************************/
65 /* Doxygen
66  -------
67  #GROUP Handling PDB Data
68  #SUBGROUP Miscellaneous functions
69  #FUNCTION blAtomNameMatch()
70  Tests whether an atom name matches an atom name specification.
71  ? or % is used to match a single character
72  * is used to match any trailing characters; it may not be used for
73  leading characters or in the middle of a specification (e.g. *B*,
74  C*2 are both illegal).
75  Wildcards may be escaped with a backslash.
76 
77  #FUNCTION blAtomNameRawMatch()
78  Tests whether an atom name matches an atom name specification
79  having been given a 'raw' atom name rather than the
80  massaged one. i.e. " CA " is C-alpha, "CA " is Calcium
81  Normally it checks against the second character onwards unless the
82  spec starts with a < in which case it checks from the beginning of
83  the string.
84 */
85 /************************************************************************/
86 /* Includes
87 */
88 #include <ctype.h>
89 #include <stdio.h>
90 #include <string.h>
91 
92 #include "macros.h"
93 #include "SysDefs.h"
94 #include "pdb.h"
95 
96 /************************************************************************/
97 /* Defines and macros
98 */
99 
100 /************************************************************************/
101 /* Globals
102 */
103 
104 /************************************************************************/
105 /* Prototypes
106 */
107 
108 /************************************************************************/
109 /*>BOOL blAtomNameMatch(char *atnam, char *spec, BOOL *ErrorWarn)
110  --------------------------------------------------------------
111 *//**
112 
113  \param[in] *atnam The atom name to test
114  \param[in] *spec The atom specification
115  \param[in,out] *ErrorWarn On input, if TRUE, this routine will
116  indicate errors.
117  On output, indicates whether there
118  was an error.
119  Note that you must be careful to supply
120  an lvalue here, you can't just use TRUE
121  or FALSE since it's modified on return.
122  NULL is allowed if you don't care about
123  errors.
124 
125  Tests whether an atom name matches an atom name specification.
126  ? or % is used to match a single character
127  * is used to match any trailing characters; it may not be used for
128  leading characters or in the middle of a specification (e.g. *B*,
129  C*2 are both illegal).
130  Wildcards may be escaped with a backslash.
131 
132  For example: C* matches all carbon atoms,
133  O5\* matches an atom called O5*
134  ?B* matches all beta atoms
135 
136 - 23.07.96 Original By: ACRM
137 - 07.07.14 Use bl prefix for functions By: CTP
138 */
139 BOOL blAtomNameMatch(char *atnam, char *spec, BOOL *ErrorWarn)
140 {
141  char *specp,
142  *atnamp;
143 
144  /* Step through the specification and the atom name */
145  for(specp=spec, atnamp = atnam; *specp; specp++, atnamp++)
146  {
147  switch(*specp)
148  {
149  case '\\':
150  /* If the specification has a \ then we are escaping the next
151  character, so just step on to that character
152  */
153  specp++;
154  break;
155  case '?':
156  /* A query in the specification matches anything, so just
157  continue
158  */
159  continue;
160  case '*':
161  /* Matches the rest of the string */
162  if(ErrorWarn != NULL)
163  {
164  /* Check that there aren't any illegal characters following */
165  if(*(specp+1) && *(specp+1) != ' ')
166  {
167  if(*ErrorWarn)
168  {
169  fprintf(stderr,"Error in atom wildcard: %s\n",spec);
170  }
171  *ErrorWarn = TRUE;
172  }
173  else
174  {
175  *ErrorWarn = FALSE;
176  }
177  }
178  return(TRUE);
179  default:
180  break;
181  }
182 
183  /* If there is a mismatch return FALSE */
184  if(*specp != *atnamp)
185  {
186  if(ErrorWarn != NULL)
187  *ErrorWarn = FALSE;
188  return(FALSE);
189  }
190 
191  /* 07.06.05 If both specifications have ended with a space of
192  end of string then return TRUE. Fixed for if the atnam is
193  shorter (after moving the alternate atom indicator into its
194  own field)
195  */
196  if((*specp == ' ') && ((*atnamp == ' ') || (*atnamp == '\0')))
197  {
198  if(ErrorWarn != NULL)
199  *ErrorWarn = FALSE;
200  return(TRUE);
201  }
202  }
203 
204  /* There have been no errors and we don't need the error flag again */
205  if(ErrorWarn != NULL)
206  *ErrorWarn = FALSE;
207 
208  /* The specification has run out, see if there are any atom characters
209  left
210  */
211  if(*atnamp && *atnamp!=' ')
212  return(FALSE);
213 
214  /* Both have ended OK, so the names match */
215  return(TRUE);
216 }
217 
218 
219 /************************************************************************/
220 /*>BOOL blAtomNameRawMatch(char *atnam, char *spec, BOOL *ErrorWarn)
221  -----------------------------------------------------------------
222 *//**
223 
224  \param[in] *atnam The atom name to check
225  \param[in] *spec The atom specification
226  \param[in,out] *ErrorWarn On input, if TRUE, this routine will
227  indicate errors.
228  On output, indicates whether there
229  was an error.
230  Note that you must be careful to supply
231  an lvalue here, you can't just use TRUE
232  or FALSE since it's modified on return.
233  NULL is allowed if you don't care about
234  errors.
235 
236  Tests whether an atom name matches an atom name specification.
237 
238  This version should be given the raw atom name rather than the
239  massaged one. i.e. " CA " is C-alpha, "CA " is Calcium
240 
241  Normally it checks against the second character onwards unless the
242  spec starts with a < in which case it checks from the beginning of
243  the string
244 
245  Written as a wrapper to AtomNameMatch()
246 
247 - 15.02.01 Original By: ACRM
248 - 07.07.14 Use bl prefix for functions By: CTP
249 */
250 BOOL blAtomNameRawMatch(char *atnam, char *spec, BOOL *ErrorWarn)
251 {
252  /* If atom spec starts with a < then just bump the spec pointer,
253  otherwise bump the atom name pointer since we will look from the
254  second character of the atom name
255  */
256  if(*spec == '<')
257  {
258  spec++;
259  }
260  else
261  {
262  atnam++;
263  }
264 
265  return(blAtomNameMatch(atnam, spec, ErrorWarn));
266 }
267 
268 #ifdef TEST_MAIN
269 int main(int argc, char **argv)
270 {
271  char spec[8], atnam[8];
272 
273  strcpy(atnam, " CA*");
274  printf("Atom name '%s':\n", atnam);
275 
276  strcpy(spec,"CA");
277  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
278 
279  strcpy(spec,"<CA");
280  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
281 
282  strcpy(spec,"C*");
283  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
284 
285  strcpy(spec,"CA*");
286  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
287 
288  strcpy(spec,"CA?");
289  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
290 
291  strcpy(spec,"C\\*");
292  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
293 
294  strcpy(spec,"C?");
295  printf("'%s' matches? %s\n", spec, (blAtomNameRawMatch(atnam, spec, NULL)?"YES":"NO"));
296 
297  return(0);
298 }
299 #endif
int main(int argc, char **argv)
Definition: test.c:4
Include file for PDB routines.
short BOOL
Definition: SysDefs.h:64
#define NULL
Definition: array2.c:99
BOOL blAtomNameRawMatch(char *atnam, char *spec, BOOL *ErrorWarn)
#define FALSE
Definition: macros.h:223
Useful macros.
#define TRUE
Definition: macros.h:219
System-type variable type definitions.
BOOL blAtomNameMatch(char *atnam, char *spec, BOOL *ErrorWarn)