Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
ResolPDB.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file ResolPDB.c
5 
6  \version V1.8
7  \date 07.07.14
8  \brief Get resolution and R-factor information out of a PDB file
9 
10  \copyright (c) UCL / Dr. Andrew C.R. Martin, 1994-2014
11  \author Dr. Andrew C. R. Martin
12  \par
13  Institute of Structural & Molecular Biology,
14  University College London,
15  Gower Street,
16  London.
17  WC1E 6BT.
18  \par
19  andrew@bioinf.org.uk
20  andrew.martin@ucl.ac.uk
21 
22 **************************************************************************
23 
24  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
25  according to the conditions laid out in the accompanying file
26  COPYING.DOC.
27 
28  The code may be modified as required, but any modifications must be
29  documented so that the person responsible can be identified.
30 
31  The code may not be sold commercially or included as part of a
32  commercial product except as described in the file COPYING.DOC.
33 
34 **************************************************************************
35 
36  Description:
37  ============
38 
39 
40 **************************************************************************
41 
42  Usage:
43  ======
44 
45  See documentation for details
46 
47 **************************************************************************
48 
49  Revision History:
50  =================
51 - V1.0 28.02.94 Original
52 - V1.1 18.03.94 Removed extraneous printf() statement
53 - V1.2 17.07.96 Added check for EXPERIMENT TYPE : THEORETICAL MODEL
54  Fixed bug in searching for MODEL or NMR info
55 - V1.3 27.06.97 Added handing of RESOLUTION records which point you
56  to another record for the experiment type.
57  Fixed EXPERIMENT TYPE to look for NMR as well
58  Looks for EXPDTA NMR record
59 - V1.4 23.03.98 Added check that RESOLUTION record is in a REMARK 2
60 - V1.5 08.02.99 GetResolPDB() now a wrapper to GetExptl() which
61  now parses structured REMARK3 blocks and also returns
62  the Free R. Initialise some variables to 0.0
63 - V1.6 30.05.02 Incorporated changes from Inpharmatica - now finds
64  Electron diffraction as an experimental type. Handles
65  files without REMARK 2 correctly
66 - V1.7 13.12.12 Complete re-implementation of GetExptl() for remediated
67  PDB files. Old version for old PDB files available as
68  GetExptlOld()
69 - V1.8 07.07.14 Use bl prefix for functions By: CTP
70 - V1.9 02.03.15 Renamed blGetExptl() to blGetExptlPDB()
71  Moved ReadData() out from blGetExptlPDB()
72  Added blGetExptlWholePDB()
73 
74 *************************************************************************/
75 /* Doxygen
76  -------
77  #GROUP Handling PDB Data
78  #SUBGROUP File IO
79  #FUNCTION blGetResolPDB()
80  Attempts to obtain resolution and R-factor information
81  out of a PDB file. Does not provide R-free - use of blGetExptlPDB() is
82  recommended.
83 
84  #FUNCTION blGetExptlPDB()
85  This routine attempts to obtain resolution, R-factor, R-Free and
86  experiment type information out of a PDB file.
87 
88  #FUNCTION blGetResolWholePDB()
89  Attempts to obtain resolution and R-factor information out of the
90  headers stored in a WHOLEPDB structure. Does not provide R-free -
91  use of blGetExptlWholePDB() is recommended.
92 
93  #FUNCTION blGetExptlWholePDB()
94  This routine attempts to obtain resolution, R-factor, R-Free and
95  experiment type information out of PDB headers stored in a WHOLEPDB
96  structure.
97 
98  #FUNCTION blReportStructureType()
99  Returns structure type description from a numeric representation
100 
101 
102 
103  #GROUP General Programming
104  #SUBGROUP File IO
105 
106  #FUNCTION FindNextNumber()
107  Find the next number which occurs in a file within a specified number
108  of lines.
109 */
110 /************************************************************************/
111 /* Includes
112 */
113 #include <stdio.h>
114 #include <string.h>
115 #include "pdb.h"
116 #include "macros.h"
117 
118 /************************************************************************/
119 /* Defines and macros
120 */
121 #define MAXBUFF 160
122 
123 /************************************************************************/
124 /* Globals
125 */
126 
127 /************************************************************************/
128 /* Prototypes
129 */
130 static BOOL HasText(char *ptr, char *hasWords, char *notWords);
131 static int SetStrucType(char *ptr);
132 static REAL GetNumberAfterColon(char *ptr);
133 static BOOL FindNextNumber(char *buffer, FILE *fp, int nlines, int nskip,
134  int ncheck, REAL *value);
135 static void ReadData(char *buffer, REAL *resolution, REAL *RFactor,
136  REAL *FreeR, int *StrucType);
137 
138 
139 /************************************************************************/
140 /*>BOOL blGetResolPDB(FILE *fp, REAL *resolution, REAL *RFactor,
141  int *StrucType)
142  -------------------------------------------------------------
143 *//**
144 
145  \param[in] *fp PDB file pointer
146  \param[out] *resolution The resolution (0.0 if not applicable)
147  \param[out] *RFactor The R-factor (0.0 if not found)
148  \param[out] *StrucType Structure type:
149  STRUCTURE_TYPE_XTAL
150  STRUCTURE_TYPE_NMR
151  STRUCTURE_TYPE_MODEL
152  STRUCTURE_TYPE_UNKNOWN
153  \return TRUE if resolution found (even if not
154  applicable)
155 
156  This routine attempts to obtain resolution and R-factor information
157  out of a PDB file.
158  It returns TRUE or FALSE to indicate whether valid information was
159  found. Resolution-not-applicable structures then have the resolution
160  set to zero.
161 
162  N.B.
163  The resolution information returned by the routine is reliable; the
164  R-factor information is stored in so many forms that it is
165  difficult to read without some form of natural language parsing, but
166  we manage to handle most situations.
167  The routine assumes the R-factor to be the first number after the
168  words `R-value' (or one of the other keys - see the case statement in
169  the code for the valid keywords). Thus we cannot handle records of the
170  form:
171  THE R-VALUE FOR 7142 REFLECTIONS BETWEEN 10.0 AND 1.97 ANGSTROMS
172  REFINEMENT CYCLE 73 IS 0.254.
173  as appears in entries such as 1LZT. Here, the first number is
174  the number of reflections. There is thus a kludge which sets the
175  R-factor to zero if it was read as greater than 0.5 to avoid
176  this situation. In these cases, we lose the R-factor information.
177  This occurs in approx 3.5% of the 1XXX PDB entries.
178 
179 - 25.02.94 Original By: ACRM
180 - 28.02.94 Added " R = " and check that VALUE wasn't B-VALUE
181 - 17.07.96 Added check for EXPERIMENT TYPE : THEORETICAL MODEL
182  Also fixed bug in searching REMARK record
183 - 27.06.97 Added handing of RESOLUTION records which point you to
184  another record for the experiment type.
185  Fixed some calls to FindNextNumber() which were checking
186  an 80 character width
187  Fixed EXPERIMENT TYPE to look for NMR as well
188  Looks for EXPDTA NMR record
189 - 23.03.98 Added check that RESOLUTION record is in a REMARK 2
190 - 08.02.99 Now a wrapper to GetExptl() which also returns FreeR
191 - 07.07.14 Use bl prefix for functions By: CTP
192 */
193 BOOL blGetResolPDB(FILE *fp, REAL *resolution, REAL *RFactor,
194  int *StrucType)
195 {
196  REAL FreeR;
197 
198  return(blGetExptlPDB(fp, resolution, RFactor, &FreeR, StrucType));
199 }
200 
201 
202 /************************************************************************/
203 /*>BOOL blGetResolWholePDB(WHOLEPDB *wpdb, REAL *resolution,
204  REAL *RFactor, int *StrucType)
205  -------------------------------------------------------------
206 *//**
207 
208  \param[in] *wpdb WHOLEPDB structure pointer
209  \param[out] *resolution The resolution (0.0 if not applicable)
210  \param[out] *RFactor The R-factor (0.0 if not found)
211  \param[out] *StrucType Structure type:
212  STRUCTURE_TYPE_XTAL
213  STRUCTURE_TYPE_NMR
214  STRUCTURE_TYPE_MODEL
215  STRUCTURE_TYPE_UNKNOWN
216  \return TRUE if resolution found or valid
217  structure type found
218 
219  This routine attempts to obtain resolution and R-factor information
220  out of a WHOLEPDB structure.
221  It returns TRUE or FALSE to indicate whether valid information was
222  found. Resolution-not-applicable structures then have the resolution
223  set to zero.
224 
225  N.B.
226  The resolution information returned by the routine is reliable; the
227  R-factor information is stored in so many forms that it is
228  difficult to read without some form of natural language parsing, but
229  we manage to handle most situations.
230  The routine assumes the R-factor to be the first number after the
231  words `R-value' (or one of the other keys - see the case statement in
232  the code for the valid keywords). Thus we cannot handle records of the
233  form:
234  THE R-VALUE FOR 7142 REFLECTIONS BETWEEN 10.0 AND 1.97 ANGSTROMS
235  REFINEMENT CYCLE 73 IS 0.254.
236  as appears in entries such as 1LZT. Here, the first number is
237  the number of reflections. There is thus a kludge which sets the
238  R-factor to zero if it was read as greater than 0.5 to avoid
239  this situation. In these cases, we lose the R-factor information.
240  This occurs in approx 3.5% of the 1XXX PDB entries.
241 
242 - 02.03.15 Original By: ACRM
243 */
244 BOOL blGetResolWholePDB(WHOLEPDB *wpdb, REAL *resolution, REAL *RFactor,
245  int *StrucType)
246 {
247  REAL FreeR;
248 
249  return(blGetExptlWholePDB(wpdb, resolution, RFactor, &FreeR, StrucType));
250 }
251 
252 
253 /************************************************************************/
254 /*>BOOL blGetExptlPDB(FILE *fp, REAL *resolution, REAL *RFactor,
255  REAL *FreeR, int *StrucType)
256  -------------------------------------------------------------
257 *//**
258 
259  \param[in] *fp PDB file pointer
260  \param[out] *resolution The resolution (0.0 if not applicable)
261  \param[out] *RFactor The R-factor (0.0 if not found)
262  \param[out] *FreeR The Free R-factor (0.0 if not found)
263  \param[out] *StrucType Structure type:
264  STRUCTURE_TYPE_XTAL
265  STRUCTURE_TYPE_NMR
266  STRUCTURE_TYPE_MODEL
267  STRUCTURE_TYPE_UNKNOWN
268  \return TRUE if resolution found or valid
269  structure type found
270 
271  This routine attempts to obtain resolution and R-factor information
272  out of a PDB file.
273  It returns TRUE or FALSE to indicate whether valid information was
274  found. Resolution-not-applicable structures then have the resolution
275  set to zero.
276 
277 - 12.12.11 Original By: ACRM
278  New implementation for remediated PDB files.
279  The old version is available as GetExptlOld() which handles
280  old format files.
281  NOTE If multiple methods specified in EXPDTA record, only the
282  first is used
283  If multiple R-factors are provided in different sections,
284  then the first one is returned.
285 - 07.07.14 Use bl prefix for functions By: CTP
286 - 02.03.15 Moved the actual work into ReadData() By: ACRM
287  Renamed from blGetExptl()
288 */
289 BOOL blGetExptlPDB(FILE *fp, REAL *resolution, REAL *RFactor, REAL *FreeR,
290  int *StrucType)
291 {
292  char buffer[MAXBUFF];
293 
294  /* Set some defaults */
295  *resolution = (REAL)0.0;
296  *RFactor = (REAL)0.0;
297  *FreeR = (REAL)0.0;
298  *StrucType = STRUCTURE_TYPE_UNKNOWN;
299 
300 
301  /* Make sure we're at the start of the PDB file */
302  rewind(fp);
303 
304  /* Get lines from the PDB file */
305  while(fgets(buffer,MAXBUFF,fp))
306  {
307  TERMINATE(buffer);
308  buffer[72] = '\0';
309 
310  /* Break out of the loop as soon as we hit an ATOM record */
311  if(!strncmp(buffer,"ATOM ",6))
312  break;
313 
314  ReadData(buffer, resolution, RFactor, FreeR, StrucType);
315  } /* End of loop through PDB file */
316 
317  /* Return successfully; the output data are already stored in the
318  appropriate places
319  */
320  return ((*resolution > 0.0) ||
321  ( *StrucType != STRUCTURE_TYPE_UNKNOWN ) );
322 }
323 
324 /************************************************************************/
325 /*>BOOL blGetExptlWholePDB(WHOLEPDB *wpdb, REAL *resolution,
326  REAL *RFactor, REAL *FreeR, int *StrucType)
327  -------------------------------------------------------------------
328 *//**
329 
330  \param[in] *wpdb WHOLEPDB structure pointer
331  \param[out] *resolution The resolution (0.0 if not applicable)
332  \param[out] *RFactor The R-factor (0.0 if not found)
333  \param[out] *FreeR The Free R-factor (0.0 if not found)
334  \param[out] *StrucType Structure type:
335  STRUCTURE_TYPE_XTAL
336  STRUCTURE_TYPE_NMR
337  STRUCTURE_TYPE_MODEL
338  STRUCTURE_TYPE_UNKNOWN
339  \return TRUE if resolution found or valid
340  structure type found
341 
342  This routine attempts to obtain resolution and R-factor information
343  out of a PDB file.
344  It returns TRUE or FALSE to indicate whether valid information was
345  found. Resolution-not-applicable structures then have the resolution
346  set to zero.
347 
348 - 02.03.15 Original based on blGetExptlPDB() By: ACRM
349 */
350 BOOL blGetExptlWholePDB(WHOLEPDB *wpdb, REAL *resolution, REAL *RFactor,
351  REAL *FreeR, int *StrucType)
352 {
353  char buffer[MAXBUFF];
354  STRINGLIST *s;
355 
356  /* Set some defaults */
357  *resolution = (REAL)0.0;
358  *RFactor = (REAL)0.0;
359  *FreeR = (REAL)0.0;
360  *StrucType = STRUCTURE_TYPE_UNKNOWN;
361 
362  /* Get lines from the PDB file */
363  for(s=wpdb->header; s!=NULL; NEXT(s))
364  {
365  strncpy(buffer, s->string, MAXBUFF);
366 
367  TERMINATE(buffer);
368  buffer[72] = '\0';
369 
370  ReadData(buffer, resolution, RFactor, FreeR, StrucType);
371  } /* End of loop through PDB file */
372 
373 
374  /* Return successfully; the output data are already stored in the
375  appropriate places
376  */
377  return ((*resolution > 0.0) ||
378  ( *StrucType != STRUCTURE_TYPE_UNKNOWN ) );
379 }
380 
381 
382 /************************************************************************/
383 /*>char *blReportStructureType(int StrucType)
384  ------------------------------------------
385 *//**
386 
387  \param[in] StrucType Stucture type returned by ResolPDB()
388  \return Stucture type description.
389 
390  Returns structure description.
391 
392 
393  STRUCTURE_TYPE_UNKNOWN Unknown
394  STRUCTURE_TYPE_XTAL X-ray crystal structure
395  STRUCTURE_TYPE_NMR NMR
396  STRUCTURE_TYPE_MODEL Model
397  STRUCTURE_TYPE_ELECTDIFF Electron Diffraction
398  STRUCTURE_TYPE_FIBER Fiber Diffraction
399  STRUCTURE_TYPE_SSNMR Solid State NMR
400  STRUCTURE_TYPE_NEUTRON Neutron Scattering
401  STRUCTURE_TYPE_EM Electron Miscroscopy
402  STRUCTURE_TYPE_SOLSCAT Solution Scattering
403  STRUCTURE_TYPE_IR Infra-red Spectroscopy
404  STRUCTURE_TYPE_POWDER Powder Diffraction
405  STRUCTURE_TYPE_FRET Fluorescence Transfer
406 
407 
408 - 07.07.14 Use bl prefix for functions By: CTP
409 */
410 char *blReportStructureType(int StrucType)
411 {
412  switch(StrucType)
413  {
415  return("Unknown");
416  break;
417  case STRUCTURE_TYPE_XTAL:
418  return("X-ray crystal structure");
419  break;
420  case STRUCTURE_TYPE_NMR:
421  return("NMR");
422  break;
424  return("Model");
425  break;
427  return("Electron Diffraction");
428  break;
430  return("Fiber Diffraction");
431  break;
433  return("Solid State NMR");
434  break;
436  return("Neutron Scattering");
437  break;
438  case STRUCTURE_TYPE_EM:
439  return("Electron Miscroscopy");
440  break;
442  return("Solution Scattering");
443  break;
444  case STRUCTURE_TYPE_IR:
445  return("Infra-red Spectroscopy");
446  break;
448  return("Powder Diffraction");
449  break;
450  case STRUCTURE_TYPE_FRET:
451  return("Fluorescence Transfer");
452  break;
453  default:
454  return("Unknown");
455  break;
456  }
457  return("");
458 }
459 
460 
461 /************************************************************************/
462 static REAL GetNumberAfterColon(char *ptr)
463 {
464  char *colon;
465  REAL val = 0.0;
466 
467  if((colon = strchr(ptr, ':'))!=NULL)
468  {
469  colon++;
470  sscanf(colon, "%lf", &val);
471  }
472 
473  return(val);
474 }
475 
476 
477 /************************************************************************/
478 static BOOL HasText(char *ptr, char *hasWords, char *notWords)
479 {
480  char *p = ptr,
481  *h = hasWords,
482  *n = notWords,
483  word1[80],
484  word2[80];
485  int nRequired = 0,
486  nFound = 0;
487 
488 
489  /* Step through the words we must have */
490  while((h=blGetWord(h, word1, 80))!=NULL)
491  {
492  nRequired++;
493  /* Step through the words in our string */
494  p = ptr;
495  while((p=blGetWord(p, word2, 80))!=NULL)
496  {
497  if(!strcmp(word1, word2))
498  {
499  nFound++;
500  break;
501  }
502  }
503  }
504 
505  /* If we didn't find all the words return false */
506  if(nFound != nRequired)
507  {
508  return(FALSE);
509  }
510 
511  /* We found all the words we must have. Are
512  there any we must NOT have?
513  Step through the words we must not have
514  */
515  while((n=blGetWord(n, word1, 80))!=NULL)
516  {
517  /* Step through the words in our string */
518  p = ptr;
519  while((p=blGetWord(p, word2, 80))!=NULL)
520  {
521  /* Return false if we have a match */
522  if(!strcmp(word1, word2))
523  {
524  return(FALSE);
525  }
526  }
527  }
528 
529  return(TRUE);
530 }
531 
532 /************************************************************************/
533 static int SetStrucType(char *ptr)
534 {
535  if(strstr(ptr, "DIFFRACTION"))
536  {
537  if(strstr(ptr, "X-RAY"))
538  {
539  return(STRUCTURE_TYPE_XTAL);
540  }
541  else if(strstr(ptr, "FIBER"))
542  {
543  return(STRUCTURE_TYPE_FIBER);
544  }
545  else if(strstr(ptr, "NEUTRON"))
546  {
547  return(STRUCTURE_TYPE_NEUTRON);
548  }
549  else if(strstr(ptr, "POWDER"))
550  {
551  return(STRUCTURE_TYPE_POWDER);
552  }
553  }
554  else if(strstr(ptr, "ELECTRON"))
555  {
556  if(strstr(ptr, "CRYSTALLOGRAPHY"))
557  {
558  return(STRUCTURE_TYPE_ELECTDIFF);
559  }
560  else if(strstr(ptr, "MICROSCOPY"))
561  {
562  return(STRUCTURE_TYPE_EM);
563  }
564  }
565  else if(strstr(ptr, "SOLUTION"))
566  {
567  if(strstr(ptr, "NMR"))
568  {
569  return(STRUCTURE_TYPE_NMR);
570  }
571  else if(strstr(ptr, "SCATTERING"))
572  {
573  return(STRUCTURE_TYPE_SOLSCAT);
574  }
575  }
576  else if(strstr(ptr, "SOLID-STATE"))
577  {
578  if(strstr(ptr, "NMR"))
579  {
580  return(STRUCTURE_TYPE_SSNMR);
581  }
582  }
583  else if(strstr(ptr, "SPECTROSCOPY"))
584  {
585  if(strstr(ptr, "INFRARED"))
586  {
587  return(STRUCTURE_TYPE_IR);
588  }
589  }
590  else if(strstr(ptr, "FLUORESCENCE"))
591  {
592  if(strstr(ptr, "TRANSFER"))
593  {
594  return(STRUCTURE_TYPE_FRET);
595  }
596  }
597 
598  return(STRUCTURE_TYPE_UNKNOWN);
599 }
600 
601 
602 /************************************************************************/
603 /*>BOOL blGetExptlOld(FILE *fp, REAL *resolution, REAL *RFactor,
604  REAL *FreeR, int *StrucType)
605  -------------------------------------------------------------
606 *//**
607 
608  \param[in] *fp PDB file pointer
609  \param[out] *resolution The resolution (0.0 if not applicable)
610  \param[out] *RFactor The R-factor (0.0 if not found)
611  \param[out] *FreeR The Free R-factor (0.0 if not found)
612  \param[out] *StrucType Structure type:
613  STRUCTURE_TYPE_XTAL
614  STRUCTURE_TYPE_NMR
615  STRUCTURE_TYPE_MODEL
616  STRUCTURE_TYPE_UNKNOWN
617  \return TRUE if resolution found (even if not
618  applicable)
619 
620  This routine attempts to obtain resolution and R-factor information
621  out of a PDB file.
622  It returns TRUE or FALSE to indicate whether valid information was
623  found. Resolution-not-applicable structures then have the resolution
624  set to zero.
625 
626  N.B.
627  The resolution information returned by the routine is reliable; the
628  R-factor information is stored in so many forms that it is
629  difficult to read without some form of natural language parsing, but
630  we manage to handle most situations.
631  The routine assumes the R-factor to be the first number after the
632  words `R-value' (or one of the other keys - see the case statement in
633  the code for the valid keywords). Thus we cannot handle records of the
634  form:
635  THE R-VALUE FOR 7142 REFLECTIONS BETWEEN 10.0 AND 1.97 ANGSTROMS
636  REFINEMENT CYCLE 73 IS 0.254.
637  as appears in entries such as 1LZT. Here, the first number is
638  the number of reflections. There is thus a kludge which sets the
639  R-factor to zero if it was read as greater than 0.5 to avoid
640  this situation. In these cases, we lose the R-factor information.
641  This occurs in approx 3.5% of the 1XXX PDB entries.
642 
643 - 25.02.94 Original By: ACRM
644 - 28.02.94 Added " R = " and check that VALUE wasn't B-VALUE
645 - 17.07.96 Added check for EXPERIMENT TYPE : THEORETICAL MODEL
646  Also fixed bug in searching REMARK record
647 - 27.06.97 Added handing of RESOLUTION records which point you to
648  another record for the experiment type.
649  Fixed some calls to FindNextNumber() which were checking
650  an 80 character width
651  Fixed EXPERIMENT TYPE to look for NMR as well
652  Looks for EXPDTA NMR record
653 - 23.03.98 Added check that RESOLUTION record is in a REMARK 2
654 - 08.03.99 Renamed to GetExptl() from GetResolPDB() and added
655  FreeR parameter. GetResolPDB() is now a wrapper to this
656  routine.
657  Added additional pass which looks for the structured
658  REMARK 3 records
659 - 28.04.99 Initialise FindRefRecord et al. to zero
660 - 18.06.99 Added other strings to the valid structured block for pass 0
661  Added check for -ve R-factor
662 - 08.09.99 Now takes the first FREE R-factor followed by 17 spaces
663  rather than the last
664 - 07.07.14 Use bl prefix for functions By: CTP
665 */
666 BOOL blGetExptlOld(FILE *fp, REAL *resolution, REAL *RFactor, REAL *FreeR,
667  int *StrucType)
668 {
669  BOOL ResNotApplic = FALSE, /* Found resolution not applicable */
670  HaveResol = FALSE, /* Found resolution data */
671  HaveRFac = FALSE, /* Found R-factor */
672  HaveFreeR = FALSE, /* Found free R-factor */
673  InAllDataBlock = FALSE; /* Start of structured REMARK3 block */
674 
675  char *ptr,
676  buffer[MAXBUFF];
677  int PassNumber;
678 
679  /* Set some defaults */
680  *resolution = (REAL)0.0;
681  *RFactor = (REAL)0.0;
682  *FreeR = (REAL)0.0;
683  *StrucType = STRUCTURE_TYPE_XTAL;
684 
685  /* We allow a series of passes of the PDB file to get the R-value
686  information. This is described in so many different ways that we
687  make checks on the most likely forms in the first pass; if that
688  fails, we make subsequent passes using less likely (and less
689  definite) options.
690  */
691  for(PassNumber = 0; PassNumber<5; PassNumber++)
692  {
693 
694  /* Make sure we're at the start of the PDB file */
695  rewind(fp);
696 
697  /* Get lines from the PDB file */
698  while(fgets(buffer,MAXBUFF,fp))
699  {
700  TERMINATE(buffer);
701  buffer[72] = '\0';
702 
703  /* Break out of the loop as soon as we hit an ATOM record */
704  if(!strncmp(buffer,"ATOM ",6))
705  break;
706 
707  /* See if we've found a REMARK record */
708  if(!strncmp(buffer,"REMARK",6))
709  {
710  /* If we haven't got it already, see if this record contains
711  the Resolution information
712  */
713  if(!HaveResol)
714  {
715  /* Test for a RESOLUTION sub-record not in a TITL */
716  if(((ptr = strstr(buffer,"RESOL"))!=NULL) &&
717  (strstr(buffer,"TITL ") == NULL) &&
718  (strstr(buffer,"REMARK 2")))
719  {
720  HaveResol = TRUE;
721  /* If we find the word NOT, then resolution is not
722  applicable
723  */
724  if(strstr(ptr,"NOT"))
725  {
726  ResNotApplic = TRUE;
727  break; /* Out of search through PDB */
728  }
729  else /* Look for the actual resolution value */
730  {
731  if(!FindNextNumber(ptr,fp,1,10,62,resolution))
732  {
733  HaveResol = FALSE;
734  }
735  }
736  }
737  }
738 
739  /* If we've got the resolution and it is a real value,
740  then start looking for the R-factor information
741  */
742  if(HaveResol && !ResNotApplic)
743  {
744  switch(PassNumber)
745  {
746  case 0:
747  /* 08.03.99 This pass looks for structured REMARK 3
748  records
749  */
750  if(!strncmp(buffer,"REMARK 3",10))
751  {
752  if(strstr(buffer,
753  "FIT/AGREEMENT OF MODEL WITH ALL DATA") ||
754  strstr(buffer,
755  "DATA USED IN REFINEMENT"))
756  {
757  InAllDataBlock = TRUE;
758  }
759  else if(InAllDataBlock)
760  {
761  if((ptr=strstr(buffer,
762  "R VALUE (WORKING + TEST"))
763  != NULL)
764  {
765  if(FindNextNumber(ptr,fp,0,50,65,RFactor))
766  HaveRFac = TRUE;
767  else
768  *RFactor = (REAL)0.0;
769  }
770  else if(!HaveRFac &&
771  ((ptr=strstr(buffer,
772  "R VALUE (WORKING"))
773  != NULL))
774  {
775  if(FindNextNumber(ptr,fp,0,50,65,RFactor))
776  HaveRFac = TRUE;
777  else
778  *RFactor = (REAL)0.0;
779  }
780 
781  /* 06.09.99 Added check on HaveFreeR */
782  if(!HaveFreeR &&
783  (ptr=strstr(buffer,
784  "FREE R VALUE "))
785  != NULL)
786  {
787  if(FindNextNumber(ptr,fp,0,50,65,FreeR))
788  HaveFreeR = TRUE;
789  else
790  *FreeR = (REAL)0.0;
791  }
792 
793  if(strstr(buffer, "NUMBER OF NON-HYDROGEN")!=NULL)
794  InAllDataBlock = FALSE;
795  }
796  }
797  break;
798  case 1:
799  /* This pass we look for R and FACTOR/VALUE on the
800  same line with a space before the R
801  */
802  if((ptr=strstr(buffer," R-FAC")) != NULL ||
803  (ptr=strstr(buffer," R FAC")) != NULL ||
804  (ptr=strstr(buffer," R =")) != NULL ||
805  (ptr=strstr(buffer," R-VAL")) != NULL ||
806  (ptr=strstr(buffer," R VAL")) != NULL)
807  {
808  if(FindNextNumber(ptr,fp,1,10,62,RFactor))
809  HaveRFac = TRUE;
810  else
811  *RFactor = (REAL)0.0;
812  }
813  break;
814  case 2:
815  /* This pass we look for R and FACTOR/VALUE on the
816  same line with no space before R; beginning of
817  line, we hope.
818  */
819  if((ptr=strstr(buffer,"R-FAC")) != NULL ||
820  (ptr=strstr(buffer,"R FAC")) != NULL ||
821  (ptr=strstr(buffer,"R-VAL")) != NULL ||
822  (ptr=strstr(buffer,"R VAL")) != NULL)
823  {
824  if(FindNextNumber(ptr,fp,1,10,62,RFactor))
825  HaveRFac = TRUE;
826  else
827  *RFactor = (REAL)0.0;
828  }
829  break;
830  case 3:
831  /* This pass, we look just for the word VALUE */
832  if((ptr=strstr(buffer,"VALUE")) != NULL)
833  {
834  /* Having found VALUE, we must check that it wasn't
835  B-VALUE. Just checking the previous character
836  wasn't a - is sufficient
837  */
838  if((ptr > buffer) && (*(ptr-1) != '-'))
839  {
840  if(FindNextNumber(ptr,fp,1,10,62,RFactor))
841  HaveRFac = TRUE;
842  else
843  *RFactor = (REAL)0.0;
844  }
845  }
846  break;
847  case 4:
848  /* This pass, we look just for the word FACTOR */
849  if((ptr=strstr(buffer,"FACTOR")) != NULL)
850  {
851  if(FindNextNumber(ptr,fp,1,10,62,RFactor))
852  HaveRFac = TRUE;
853  else
854  *RFactor = (REAL)0.0;
855  }
856  break;
857  } /* End of switch */
858 
859  /* If we've got an R-factor, but it's larger than 0.5,
860  then we've miss-read it. This is most likely to
861  result from the condition described in the note in
862  the header of this routine or from an error in what
863  we find in cases 2 & 3. We can't handle this
864  situation, so set value to zero.
865 
866  18.06.99 Also check for it being negative
867  */
868  if(*RFactor > (REAL)0.5 || *RFactor < (REAL)0.0)
869  *RFactor = (REAL)0.0;
870 
871  /* If we've got the R-factor, break out of the search
872  through the PDB file.
873  */
874  if(InAllDataBlock)
875  {
876  if(HaveRFac && HaveFreeR)
877  break;
878  }
879  else
880  {
881  if(HaveRFac)
882  break;
883  }
884  } /* End of Is resolution applicable? */
885  } /* End of test for it being a REMARK line */
886  } /* End of loop through PDB file */
887 
888  /* After first pass, if we don't have the resolution information
889  then we give up; there seems to be no interpretable information
890  in the file. Output data are already set.
891  */
892  /* MJP 5.i.00
893  Move this to end; some XRay PDBs (e.g., 2rec) don't have a
894  remark 2 record. This is not legal PDB, but it exists, so we
895  should parse it.
896  */
897  /* if(!HaveResol)
898  return(FALSE); */
899 
900  /* Don't bother with another pass if we've found that the
901  RESOLUTION record says NOT APPLICABLE, or we've already got
902  the R-factor information.
903  */
904  if(ResNotApplic || HaveRFac)
905  break;
906  } /* End of for() each pass */
907 
908  /* If it's resolution not applicable see if it's NMR or MODEL */
909  if ( ResNotApplic || !HaveResol )
910  {
911  REAL FindRecord = 0.0,
912  FindRefRecord = 0.0,
913  ThisRecord = 0.0;
914 
915  *StrucType = STRUCTURE_TYPE_UNKNOWN;
916 
917  /* Search through the same REMARK as the RESOLUTION for "MODEL",
918  "NON_EXP" or "NMR" to see if it's NMR or MODEL. Also search
919  all remark records for EXPERIMENT TYPE :
920  */
921  /* Find the REMARK number from the buffer
922  17.07.96 Corrected ptr to buffer
923  */
924  if(FindNextNumber(buffer, fp, 0, 0, 71, &FindRecord))
925  {
926  /* 27.06.97 ACRM. Also find any referenced record */
927  if(strstr(buffer+10,"REMARK"))
928  {
929  if(!FindNextNumber(buffer+10, fp, 0, 10, 71, &FindRefRecord))
930  FindRefRecord = (-1);
931  }
932 
933  rewind(fp);
934  while(fgets(buffer,MAXBUFF,fp))
935  {
936  if(!strncmp(buffer,"ATOM ",6)) break;
937  if(!strncmp(buffer,"EXPDTA",6)) /* 27.06.97, ACRM */
938  {
939  if(strstr(buffer,"NMR"))
940  {
941  *StrucType = STRUCTURE_TYPE_NMR;
942  break;
943  }
944  /* 05.i.00 MJP */
945  else if (strstr (buffer,"ELECTRON MICROSCOPY"))
946  {
947  *StrucType = STRUCTURE_TYPE_ELECTDIFF;
948  break;
949  }
950  }
951 
952  if(!strncmp(buffer,"REMARK",6))
953  {
954  if(FindNextNumber(buffer, fp, 0, 0, 71, &ThisRecord))
955  {
956  if(((int)FindRecord == (int)ThisRecord) ||
957  ((int)FindRefRecord == (int)ThisRecord))
958  {
959  if(strstr(buffer,"MODEL")||
960  strstr(buffer,"NON-EXP"))
961  {
962  *StrucType = STRUCTURE_TYPE_MODEL;
963  break;
964  }
965  if(strstr(buffer,"NMR"))
966  {
967  *StrucType = STRUCTURE_TYPE_NMR;
968  break;
969  }
970  } /* In the correct REMARK block */
971  } /* Found a number from this REMARK record */
972 
973  /* We also look for EXPERIMENT TYPE : MODEL / NMR
974  */
975  if(strstr(buffer,"EXPERIMENT"))
976  {
977  if(strstr(buffer,"MODEL"))
978  {
979  *StrucType = STRUCTURE_TYPE_MODEL;
980  break;
981  }
982  else if(strstr(buffer,"NMR"))
983  {
984  *StrucType = STRUCTURE_TYPE_NMR;
985  break;
986  }
987  else if(strstr(buffer,"X-RAY"))
988  {
989  *StrucType = STRUCTURE_TYPE_XTAL;
990  break;
991  }
992  }
993  } /* This is a REMARK record */
994  } /* while() search through PDB file */
995  } /* Found number in the RESOLUTION record to point to REMARK */
996  } /* Resolution not applicable */
997 
998 
999  /* Return successfully; the output data are already stored in the
1000  appropriate places
1001  */
1002  return (HaveResol || ( *StrucType != STRUCTURE_TYPE_UNKNOWN ) );
1003 }
1004 
1005 /************************************************************************/
1006 /*>static BOOL FindNextNumber(char *buffer, FILE *fp, int nlines,
1007  int nskip, int ncheck, REAL *value)
1008  --------------------------------------------------------------
1009 *//**
1010 
1011  Find the next number which occurs in the file within nlines lines.
1012  First looks through the character buffer given to the routine. If this
1013  fails looks at the next lines in the file itself. Extra lines are
1014  scanned character-wise, so the whole line may not be taken out of the
1015  input stream. One may choose to ignore the first nskip characters from
1016  each new line which is read from the file. This does *not* apply to
1017  the initial character buffer.
1018 
1019  \param[in] *buffer Pointer to buffer to search first
1020  \param[in] *fp Pointer to file to find additional lines in
1021  \param[in] nlines Number of additional lines to scan
1022  \param[in] nskip Skip this many characters at the start of
1023  each new line
1024  \param[in] ncheck Check only this many characters from each new
1025  line
1026  \param[out] *value The value which we find
1027  \return Success/failure.
1028 
1029 - 25.02.94 Original By: ACRM
1030 - 28.02.94 Terminates at trailing decimal place
1031 - 27.06.97 Fixed potential bug when number of lines to read ahead set
1032  to 0 was still reading characters
1033 - 07.09.99 Added check for i > 0 in FindNextNumber when examining
1034  valbuff[i-1] By: MJP
1035 */
1036 static BOOL FindNextNumber(char *buffer, FILE *fp, int nlines, int nskip,
1037  int ncheck, REAL *value)
1038 {
1039  char *ptr, /* Used to step through the buffers */
1040  valbuff[80]; /* Buffer for copying a value into */
1041  int i,j, /* Counter */
1042  linecount, /* Count lines read */
1043  chcount, /* Count characters read from file */
1044  ch; /* Character read from file */
1045 
1046  linecount = 0;
1047  i = 0;
1048  valbuff[i] = '\0';
1049 
1050  for(ptr=buffer; *ptr; ptr++)
1051  {
1052  /* If we find a space, test the assembled string */
1053  if(*ptr == ' ' || *ptr == '\t')
1054  {
1055  valbuff[i] = '\0';
1056  if ( ( i > 0 ) &&
1057  ( valbuff[i-1] == '.' || valbuff[i-1] == ',' ) )
1058  valbuff[i-1] = '\0';
1059 
1060  if((sscanf(valbuff,"%lf",value)) == 1)
1061  {
1062  return(TRUE);
1063  }
1064 
1065 
1066  /* If we get here, it wasn't a valid number, so reset */
1067  i=0;
1068  continue; /* Don't bother to copy in the space */
1069  }
1070 
1071  /* Copy in the character */
1072  valbuff[i++] = *ptr;
1073  }
1074 
1075  /* Our string has run out, so do the same thing, but getting
1076  characters from the file instead
1077  */
1078  if(!nlines)
1079  return(FALSE); /* ACRM 27.06.97 */
1080 
1081  for(j=0; j<nskip; j++)
1082  fgetc(fp);
1083  i = 0;
1084  chcount = 0;
1085  while(((ch = (int)fgetc(fp)) != EOF) && (linecount < nlines))
1086  {
1087  /* See if we've checked enough characters. If so, skip over all
1088  characters up the the next newline
1089  */
1090  if(++chcount == ncheck)
1091  {
1092  while((ch = (int)fgetc(fp)) != EOF && ch != '\n');
1093  if(ch == EOF) break;
1094  }
1095 
1096  /* If we've got a newline (including from the above skipping
1097  process), then reset out counters and skip over the nskip
1098  characters at the start of this line
1099  */
1100  if(ch == '\n')
1101  {
1102  for(j=0; j<nskip; j++)
1103  fgetc(fp);
1104 
1105  chcount = 0;
1106  linecount++;
1107  }
1108 
1109 
1110  if(ch == ' ' || ch == '\t' || ch == '\n')
1111  {
1112  valbuff[i] = '\0';
1113 
1114  if((sscanf(valbuff,"%lf",value)) == 1)
1115  {
1116  return(TRUE);
1117  }
1118 
1119  /* If we get here, it wasn't a valid number, so reset */
1120  i=0;
1121  continue; /* Don't bother to copy in the space */
1122  }
1123 
1124  /* Copy in the character */
1125  valbuff[i++] = ch;
1126  }
1127 
1128  /* If we get here, we failed to find a number */
1129  return(FALSE);
1130 }
1131 
1132 
1133 /************************************************************************/
1134 /*>static void ReadData(char *buffer, REAL *resolution, REAL *RFactor,
1135  REAL *FreeR, int *StrucType)
1136  -------------------------------------------------------------------
1137 *//**
1138  \param[in] buffer Contents of a line from a PDB file
1139  \param[out] resolution Resolution data if found
1140  \param[out] RFactor R-factor data if found
1141  \param[out] FreeR Free R data if found
1142  \param[out] StrucType Structure type data if found
1143 
1144 - 02.03.15 Original extracted from blGetExptlPDB()
1145 */
1146 static void ReadData(char *buffer, REAL *resolution, REAL *RFactor,
1147  REAL *FreeR, int *StrucType)
1148 {
1149  char *ptr = NULL;
1150 
1151  /* See if we've found a REMARK record */
1152  if(!strncmp(buffer,"REMARK",6))
1153  {
1154 
1155  char word[80];
1156  int remarkType = 0;
1157 
1158  /* See which REMARK type it is */
1159  ptr = blGetWord(buffer+6, word, 80);
1160  if(sscanf(word, "%d", &remarkType))
1161  {
1162  switch(remarkType)
1163  {
1164  case 2:
1165  if(*resolution == 0.0)
1166  {
1167  ptr = blGetWord(ptr, word, 80);
1168  if(!strncmp(word, "RESOLUTION", 10))
1169  {
1170  ptr = blGetWord(ptr, word, 80);
1171  if(!sscanf(word, "%lf", resolution))
1172  {
1173  *resolution = 0.0;
1174  }
1175  }
1176  }
1177  break;
1178  case 3:
1179  if(*RFactor == 0.0)
1180  {
1181  if(HasText(ptr, "R VALUE WORKING", "FREE"))
1182  {
1183  *RFactor = GetNumberAfterColon(ptr);
1184  }
1185  }
1186 
1187  if(*FreeR == 0.0)
1188  {
1189  if(HasText(ptr, "FREE R VALUE", "TEST ERROR"))
1190  {
1191  *FreeR = GetNumberAfterColon(ptr);
1192  }
1193  }
1194 
1195  break;
1196  case 200:
1197  /* If we didn't get the structure type from EXPDTA then
1198  try here
1199  */
1200  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1201  {
1202  if(HasText(ptr, "EXPERIMENT TYPE", NULL))
1203  {
1204  char *colon;
1205  if((colon = strchr(ptr, ':'))!=NULL)
1206  {
1207  colon++;
1208  *StrucType = SetStrucType(colon);
1209  }
1210  }
1211  }
1212  break;
1213  case 205:
1214  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1215  {
1216  *StrucType = STRUCTURE_TYPE_FIBER;
1217  }
1218  break;
1219  case 215:
1220  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1221  {
1222  *StrucType = STRUCTURE_TYPE_NMR;
1223  }
1224  break;
1225  case 217:
1226  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1227  {
1228  *StrucType = STRUCTURE_TYPE_SSNMR;
1229  }
1230  break;
1231  case 230:
1232  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1233  {
1234  *StrucType = STRUCTURE_TYPE_NEUTRON;
1235  }
1236  break;
1237  case 240:
1238  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1239  {
1240  *StrucType = STRUCTURE_TYPE_ELECTDIFF;
1241  }
1242  break;
1243  case 245:
1244  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1245  {
1246  *StrucType = STRUCTURE_TYPE_EM;
1247  }
1248  break;
1249  case 247:
1250  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1251  {
1252  *StrucType = STRUCTURE_TYPE_EM;
1253  }
1254  break;
1255  case 265:
1256  if(*StrucType == STRUCTURE_TYPE_UNKNOWN)
1257  {
1258  *StrucType = STRUCTURE_TYPE_SOLSCAT;
1259  }
1260  break;
1261  }
1262 
1263  }
1264  } /* End of test for it being a REMARK line */
1265  else if(!strncmp(buffer,"EXPDTA", 6))
1266  {
1267  char *semiColon;
1268 
1269  ptr = buffer+10;
1270 
1271  /* Terminate at semi-colon */
1272  if((semiColon=strchr(ptr,';'))!=NULL)
1273  {
1274  *semiColon = '\0';
1275  }
1276 
1277  *StrucType = SetStrucType(ptr);
1278  }
1279 }
1280 
1281 
1282 /************************************************************************/
1283 #ifdef DEMO
1284 int main(int argc, char **argv)
1285 {
1286  FILE *fp;
1287  REAL resol, RFactor, FreeR;
1288  int StrucType;
1289 
1290  fp = fopen(argv[1],"r");
1291  GetExptl(fp, &resol, &RFactor, &FreeR, &StrucType);
1292 
1293  printf("PDB: %s\n",argv[1]);
1294  printf("Resol: %f\n",resol);
1295  printf("RFactor: %f\n",RFactor);
1296  printf("Free R: %f\n",FreeR);
1297  printf("StrucType: %s\n", blReportStructureType(StrucType));
1298 
1299  return(0);
1300 }
1301 #endif
1302 
1303 
1304 
1305 
1306 
#define STRUCTURE_TYPE_MODEL
Definition: pdb.h:559
#define STRUCTURE_TYPE_NMR
Definition: pdb.h:558
BOOL blGetResolWholePDB(WHOLEPDB *wpdb, REAL *resolution, REAL *RFactor, int *StrucType)
Definition: ResolPDB.c:244
int main(int argc, char **argv)
Definition: test.c:4
Include file for PDB routines.
short BOOL
Definition: SysDefs.h:64
#define NULL
Definition: array2.c:99
#define STRUCTURE_TYPE_SSNMR
Definition: pdb.h:562
BOOL blGetExptlOld(FILE *fp, REAL *resolution, REAL *RFactor, REAL *FreeR, int *StrucType)
Definition: ResolPDB.c:666
STRINGLIST * header
Definition: pdb.h:375
#define FALSE
Definition: macros.h:223
Definition: pdb.h:372
#define NEXT(x)
Definition: macros.h:249
BOOL blGetExptlPDB(FILE *fp, REAL *resolution, REAL *RFactor, REAL *FreeR, int *StrucType)
Definition: ResolPDB.c:289
Useful macros.
#define STRUCTURE_TYPE_XTAL
Definition: pdb.h:557
#define TERMINATE(x)
Definition: macros.h:366
#define STRUCTURE_TYPE_EM
Definition: pdb.h:564
#define STRUCTURE_TYPE_SOLSCAT
Definition: pdb.h:565
double REAL
Definition: MathType.h:67
char * blReportStructureType(int StrucType)
Definition: ResolPDB.c:410
#define STRUCTURE_TYPE_UNKNOWN
Definition: pdb.h:556
#define STRUCTURE_TYPE_POWDER
Definition: pdb.h:567
BOOL blGetResolPDB(FILE *fp, REAL *resolution, REAL *RFactor, int *StrucType)
Definition: ResolPDB.c:193
#define STRUCTURE_TYPE_NEUTRON
Definition: pdb.h:563
#define TRUE
Definition: macros.h:219
char * blGetWord(char *buffer, char *word, int maxsize)
Definition: GetWord.c:268
#define MAXBUFF
Definition: ResolPDB.c:121
#define STRUCTURE_TYPE_FRET
Definition: pdb.h:568
char * string
Definition: general.h:85
BOOL blGetExptlWholePDB(WHOLEPDB *wpdb, REAL *resolution, REAL *RFactor, REAL *FreeR, int *StrucType)
Definition: ResolPDB.c:350
#define STRUCTURE_TYPE_ELECTDIFF
Definition: pdb.h:560
#define STRUCTURE_TYPE_IR
Definition: pdb.h:566
#define STRUCTURE_TYPE_FIBER
Definition: pdb.h:561