The outage for Sunday 24th November has been cancelled.
Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
PDBHeaderInfo.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file PDBHeaderInfo.c
5 
6  \version V1.8
7  \date 03.10.16
8 
9  \brief Get misc header info from PDB header
10 
11  \copyright (c) Dr. Andrew C.R. Martin / UCL, 2015-2016
12  \author Dr. Andrew C. R. Martin
13  \par
14  Institute of Structural & Molecular Biology,
15  University College London,
16  Gower Street,
17  London.
18  WC1E 6BT.
19  \par
20  andrew@bioinf.org.uk
21  andrew.martin@ucl.ac.uk
22 
23 **************************************************************************
24 
25  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
26  according to the conditions laid out in the accompanying file
27  COPYING.DOC.
28 
29  The code may be modified as required, but any modifications must be
30  documented so that the person responsible can be identified.
31 
32  The code may not be sold commercially or included as part of a
33  commercial product except as described in the file COPYING.DOC.
34 
35 **************************************************************************
36 
37  Description:
38  ============
39 
40 
41 **************************************************************************
42 
43  Usage:
44  ======
45 
46  See documentation for details
47 
48 **************************************************************************
49 
50  Revision History:
51  =================
52 - V1.0 26.03.15 Original
53 - V1.1 13.05.15 blGetTitleWholePDB() returns NULL if no title line in
54  header. Fix for blGetSpeciesWholePDBChain()
55  Added blGetCompoundWholePDBMolID() and
56  blGetSpeciesWholePDBMolID() By: CTP
57 - V1.2 04.06.15 Fixed bug in dealing with compounds where the referenced
58  chains span more than one line
59 - V1.3 09.06.15 Merged changes from CTP and ACRM.
60  Updated blGetTitleWholePDB() By: CTP
61 - V1.4 11.06.15 Added blGetSeqresAsStringWholePDB(),
62  blGetModresWholePDB() and blFindOriginalResType()
63  By: ACRM
64 - V1.5 26.06.15 Added blGetBiomoleculeWholePDB() and blFreeBiomolecule()
65 - V1.6 25.11.15 Added blGetSeqresByChainWholePDB()
66 - V1.7 02.12.15 Sequences were not being terminated properly in
67  blGetSeqresByChainWholePDB()
68 - V1.8 03.10.16 Added <stdlib.h>
69 
70 *************************************************************************/
71 /* Doxygen
72  -------
73  #GROUP Handling PDB Data
74  #SUBGROUP File IO
75 
76  #FUNCTION blGetHeaderWholePDB()
77  Obtains the data in the HEADER record from WHOLEPDB info
78 
79  #FUNCTION blGetTitleWholePDB()
80  Obtains the title information from WHOLEPDB info
81 
82  #FUNCTION blGetCompoundWholePDBChain()
83  Obtains the compound data for a specified chain from WHOLEPDB info
84 
85  #FUNCTION blFindMolID()
86  Finds the MOL_ID for a specified chain
87 
88  #FUNCTION blGetSpeciesWholePDBChain()
89  Obtains the species data for a specified chain from WHOLEPDB info
90 
91  #FUNCTION blGetCompoundWholePDBCMolID()
92  Obtains the compound data for a specified MOL_ID from WHOLEPDB info
93 
94  #FUNCTION blGetSpeciesWholePDBMolID()
95  Obtains the species data for a specified MOL_ID from WHOLEPDB info
96 
97  #FUNCTION blGetSeqresAsStringWholePDB()
98  Obtain the sequence from the SEQRES records storing it as a single
99  string with *s to separate chains
100 
101  #FUNCTION blGetSeqresByChainWholePDB()
102  Obtain the sequence from the SEQRES records storing it in a hash
103  indexed by chain label
104 
105  #FUNCTION blGetModresWholePDB()
106  Obtain the MODRES data
107 
108  #FUNCTION blFindOriginalResType()
109  Find the original residue type for a modified residue from MODRES
110  data
111 
112  #FUNCTION blGetBiomoleculeWholePDB()
113  Obtain the biomolecule data
114 
115  #FUNCTION blFreeBiomolecule()
116  Free the biomolecule data
117 */
118 
119 /************************************************************************/
120 /* Includes
121 */
122 #include <stdio.h>
123 #include <stdlib.h>
124 #include <string.h>
125 #include "pdb.h"
126 #include "seq.h"
127 #include "macros.h"
128 #include "fsscanf.h"
129 #include "general.h"
130 #include "hash.h"
131 
132 /************************************************************************/
133 /* Defines and macros
134 */
135 #define MAXWORD 8
136 #define MAXBUFF 160
137 #define ALLOCSIZE 80
138 
139 #define CLEAR_BIOMOL(b) \
140  do{ \
141  (b)->details = NULL; \
142  (b)->numBiomolecules = 0; \
143  (b)->biomolNumber = 0; \
144  (b)->authorUnit[0] = '\0'; \
145  (b)->softwareUnit[0] = '\0'; \
146  (b)->chains = NULL; \
147  (b)->biomt = NULL; \
148  } while(0)
149 
150 /************************************************************************/
151 /* Globals
152 */
153 
154 /************************************************************************/
155 /* Prototypes
156 */
157 static BIOMOLECULE *doRemark300(WHOLEPDB *wpdb);
158 static BIOMOLECULE *doRemark350(WHOLEPDB *wpdb, BIOMOLECULE *biomolecule);
159 
160 /************************************************************************/
161 /*>BOOL blGetHeaderWholePDB(WHOLEPDB *wpdb,
162  char *header, int maxheader,
163  char *date, int maxdate,
164  char *pdbcode, int maxcode)
165  ---------------------------------------------------
166 *//**
167 
168  \param[in] *wpdb WHOLEPDB structure pointer
169  \param[out] *header String containing header text
170  \param[in] maxheader Max length for storing header
171  \param[out] *date Date string
172  \param[in] maxdate Max length for storing date
173  \param[out] *pdbcode PDB code
174  \param[in] maxcode Max length for storing PDB code
175  \return TRUE: Found HEADER
176  FALSE: Didn't find HEADER
177 
178  Obtains information from the PDB HEADER record
179 
180 - 26.03.15 Original By: ACRM
181 */
183  char *header, int maxheader,
184  char *date, int maxdate,
185  char *pdbcode, int maxcode)
186 {
187  STRINGLIST *s;
188  int i;
189  BOOL retval = FALSE;
190 
191  /* Blank all the strings */
192  for(i=0; i<maxheader; i++) header[i] = '\0';
193  for(i=0; i<maxdate; i++) date[i] = '\0';
194  for(i=0; i<maxcode; i++) pdbcode[i] = '\0';
195 
196  for(s=wpdb->header; s!=NULL; NEXT(s))
197  {
198  if(!strncmp(s->string, "HEADER", 6))
199  {
200  retval = TRUE;
201  strncpy(header, s->string+10, MIN(40, maxheader));
202  KILLTRAILSPACES(header);
203  strncpy(date, s->string+50, MIN( 9, maxdate));
204  strncpy(pdbcode, s->string+62, MIN( 4, maxcode));
205  break;
206  }
207  }
208 
209  return(retval);
210 }
211 
212 
213 /************************************************************************/
214 /*>char *blGetTitleWholePDB(WHOLEPDB *wpdb)
215  ----------------------------------------
216 *//**
217  \param[in] *wpdb WHOLEPDB structure
218  \return Tit;le from PDB file (malloc()'d)
219 
220  Extracts the title from a PDB file malloc()ing a string in which to
221  store the data. This must be freed by user code
222 
223 - 28.04.15 Original By: ACRM
224 - 11.05.15 Return NULL if TITLE line absent. By: CTP
225 - 09.06.15 Add columns 11 to 80 to title string for both start and
226  continuation lines. By: CTP
227 */
229 {
230  char *title = NULL,
231  *cleanTitle = NULL;
232  STRINGLIST *s;
233  BOOL inTitle = FALSE;
234 
235  for(s=wpdb->header; s!=NULL; NEXT(s))
236  {
237  if(!strncmp(s->string, "TITLE ", 6))
238  {
239  char buffer[MAXPDBANNOTATION];
240  strcpy(buffer, s->string);
241  TERMINATE(buffer);
242 
243  /* append cols 11-80 to title string */
244  title = blStrcatalloc(title, buffer+10);
245 
246  if(title == NULL)
247  return(NULL);
248  }
249  else if(inTitle)
250  {
251  break;
252  }
253  }
254 
255  /* title line not found */
256  if(title == NULL)
257  return(NULL);
258 
259  cleanTitle = blCollapseSpaces(title);
260  free(title);
261  KILLTRAILSPACES(cleanTitle);
262 
263  return(cleanTitle);
264 }
265 
266 /************************************************************************/
267 /*>static STRINGLIST *FindNextMolIDRecord(STRINGLIST *start, char *type)
268  ---------------------------------------------------------------------
269 *//**
270  \param[in] *start Start of stringlist containing header
271  \param[in] *type Type of header record - COMPND or SOURCE
272  \return Pointer to start of the next molecule ID in
273  the appropriate header records
274 
275  Find the next MOL_ID within the specified header record type (COMPND
276  or SOURCE)
277 
278  28.04.15 Original By: ACRM
279 */
280 static STRINGLIST *FindNextMolIDRecord(STRINGLIST *start, char *type)
281 {
282  STRINGLIST *s;
283 
284  if(start==NULL)
285  return(NULL);
286 
287  for(s=start->next; s!=NULL; NEXT(s))
288  {
289  if(!strncmp(s->string, type, 6))
290  {
291  if(strstr(s->string, "MOL_ID:"))
292  return(s);
293  }
294  }
295  return(NULL);
296 }
297 
298 
299 /************************************************************************/
300 /*>static BOOL ExtractField(STRINGLIST *molidStart,
301  STRINGLIST *molidStop, char *data,
302  char *type, char *field)
303  ------------------------------------------------------------
304 *//**
305  \param[in] *molidStart Start of a set of header records
306  \param[in] *molidStop Start of next set of headers (or NULL)
307  \param[out] *data Storage for extracted string
308  \param[in] *type Record type (COMPND or SOURCE)
309  \param[in] *field Sub-record field of interest
310  \return Success
311 
312  Extracts data for a field from a COMPND or SOURCE record. The field
313  data after the field specfication and is terminated by a ;
314 
315  Returns FALSE if field not found.
316 
317 - 28.04.15 Original By: ACRM
318 */
319 static BOOL ExtractField(STRINGLIST *molidStart, STRINGLIST *molidStop,
320  char *data, char *type, char *field)
321 {
322  STRINGLIST *s;
323  BOOL GotField = FALSE;
324  char *chp,
325  buffer[MAXPDBANNOTATION];
326 
327  data[0] = '\0';
328 
329  for(s=molidStart; s!=molidStop; NEXT(s))
330  {
331  if(strncmp(s->string, type, 6))
332  break;
333 
334  chp = NULL;
335 
336  if(GotField && isdigit(s->string[9]))
337  {
338  /* We have found the field already on previous line and this is
339  marked as a continuation line
340  */
341  chp = s->string+10;
342  }
343  else
344  {
345  /* Look for this field */
346  if((chp=strstr(s->string, field))!=NULL)
347  {
348  GotField = TRUE;
349  /* Step over the field name */
350  chp += strlen(field);
351  if(*chp == ' ')
352  chp++;
353  }
354  }
355 
356  if(GotField && (chp != NULL))
357  {
358  /* Copy into the buffer */
359  strncpy(buffer, chp, MAXPDBANNOTATION);
360  /* Remove spaces */
361  TERMINATE(buffer);
362  KILLTRAILSPACES(buffer);
363  /* Add to output data */
364  blStrncat(data, buffer, MAXPDBANNOTATION);
365 
366  /* Exit if the string contains a ; */
367  if((chp=strchr(data, ';'))!=NULL)
368  {
369  *chp = '\0';
370  return(TRUE);
371  }
372  }
373  }
374  return(FALSE);
375 }
376 
377 
378 /************************************************************************/
379 /*>BOOL blGetCompoundWholePDBChain(WHOLEPDB *wpdb, char *chain,
380  COMPND *compnd)
381  ------------------------------------------------------------
382 *//**
383  \param[in] *wpdb WHOLEPDB structure
384  \param[in] *chain Chain label of interest
385  \param[out] *compnd Data from the COMPND records
386  \return BOOL Success
387 
388  Extracts the COMPND data for a specified chain. Returns FALSE if the
389  chain isn't found
390 
391 - 28.04.15 Original By: ACRM
392 - 04.06.15 Modified to use the ExtractField() routine instead of
393  duplicating code here. Fixes a bug in dealing with compounds
394  where the referenced chains span more than one line
395 */
397  COMPND *compnd)
398 {
399  STRINGLIST *molidFirst,
400  *molidStart,
401  *molidStop;
402  int molid;
403 
404  compnd->molid = 0;
405  compnd->molecule[0] = '\0';
406  compnd->chain[0] = '\0';
407  compnd->fragment[0] = '\0';
408  compnd->synonym[0] = '\0';
409  compnd->ec[0] = '\0';
410  compnd->engineered[0] = '\0';
411  compnd->mutation[0] = '\0';
412  compnd->other[0] = '\0';
413 
414 #ifdef DEBUG
415  molid = blFindMolID(wpdb, chain);
416  fprintf(stderr,"DEBUG: Chain %s molid %d\n", chain, molid);
417  if(molid == 0)
418  return(FALSE);
419 #else
420  if((molid = blFindMolID(wpdb, chain)) == 0)
421  return(FALSE);
422 #endif
423 
424  molidFirst = FindNextMolIDRecord(wpdb->header, "COMPND");
425 
426  for(molidStart=molidFirst; molidStart!=NULL; molidStart=molidStop)
427  {
428  char buffer[MAXPDBANNOTATION];
429  int thisMolid = 0;
430 
431  molidStop = FindNextMolIDRecord(molidStart, "COMPND");
432 
433  ExtractField(molidStart, molidStop,
434  buffer, "COMPND", "MOL_ID:");
435  sscanf(buffer,"%d", &thisMolid);
436 
437  if(thisMolid == molid)
438  {
439  ExtractField(molidStart, molidStop,
440  compnd->molecule, "COMPND","MOLECULE:");
441  ExtractField(molidStart, molidStop,
442  compnd->chain, "COMPND", "CHAIN:");
443  ExtractField(molidStart, molidStop,
444  compnd->fragment, "COMPND", "FRAGMENT:");
445  ExtractField(molidStart, molidStop,
446  compnd->synonym, "COMPND", "SYNONYM:");
447  ExtractField(molidStart, molidStop,
448  compnd->ec, "COMPND", "EC:");
449  ExtractField(molidStart, molidStop,
450  compnd->engineered, "COMPND", "ENGINEERED:");
451  ExtractField(molidStart, molidStop,
452  compnd->mutation, "COMPND", "MUTATION:");
453  ExtractField(molidStart, molidStop,
454  compnd->other, "COMPND", "OTHER:");
455  ExtractField(molidStart, molidStop,
456  buffer, "COMPND", "MOL_ID:");
457  sscanf(buffer,"%d", &(compnd->molid));
458  return(TRUE);
459  }
460  }
461 
462  return(FALSE);
463 }
464 
465 
466 /************************************************************************/
467 /*>int blFindMolID(WHOLEPDB *wpdb, char *chain)
468  --------------------------------------------
469 *//**
470  \param[in] *wpdb WHOLEPDB structure
471  \param[in] *chain Chain label
472  \return MOL_ID or 0 if chain not found
473 
474  Finds the MOL_ID for a specified chain
475 
476 - 28.04.15 Original By: ACRM
477 - 04.06.15 Modified to use the ExtractField() routine instead of
478  duplicating code here. Fixes a bug in dealing with compounds
479  where the referenced chains span more than one line
480 */
481 int blFindMolID(WHOLEPDB *wpdb, char *chain)
482 {
483  STRINGLIST *molidFirst,
484  *molidStart,
485  *molidStop;
486 
487  molidFirst = FindNextMolIDRecord(wpdb->header, "COMPND");
488 
489  for(molidStart=molidFirst; molidStart!=NULL; molidStart=molidStop)
490  {
491  char buffer[MAXPDBANNOTATION],
492  *chp,
493  word[MAXWORD];
494 
495  molidStop = FindNextMolIDRecord(molidStart, "COMPND");
496  ExtractField(molidStart, molidStop, buffer, "COMPND", "CHAIN:");
497 
498  /* Check the chains to see if our chain is there */
499  chp = buffer;
500 
501  do {
502  int molid = 0;
503 
504  chp=blGetWord(chp, word, MAXWORD);
505  if(!strcmp(word, chain))
506  {
507  ExtractField(molidStart, molidStop,
508  buffer, "COMPND", "MOL_ID:");
509  sscanf(buffer,"%d", &molid);
510  return(molid);
511  }
512  } while(chp!=NULL);
513  }
514 
515  return(0);
516 }
517 
518 
519 /************************************************************************/
520 /*>BOOL blGetSpeciesWholePDBChain(WHOLEPDB *wpdb, char *chain,
521  PDBSOURCE *source)
522  -----------------------------------------------------------
523 *//**
524  \param[in] *wpdb WHOLEPDB structure
525  \param[in] *chain Chain label
526  \param[out] *source SOURCE information for chain
527  \return Success (chain found?)
528 
529  Extracts the SOURCE data for a specified chain
530 
531 - 26.03.15 Original By: ACRM
532 - 13.05.15 Fixes...
533 */
535  PDBSOURCE *source)
536 {
537  STRINGLIST *s,
538  *molidFirst = NULL,
539  *molidStart = NULL,
540  *molidStop = NULL;
541  int molid = 0;
542 
543  source->scientificName[0] = '\0';
544  source->commonName[0] = '\0';
545  source->strain[0] = '\0';
546  source->taxid = 0;
547 
548  if((molid = blFindMolID(wpdb, chain)) == 0)
549  return(FALSE);
550 
551  molidFirst = FindNextMolIDRecord(wpdb->header, "SOURCE");
552 
553  for(molidStart=molidFirst; molidStart!=NULL; molidStart=molidStop)
554  {
555  molidStop = FindNextMolIDRecord(molidStart, "SOURCE");
556  for(s=molidStart; s!=molidStop; NEXT(s))
557  {
558  char buffer[MAXPDBANNOTATION];
559  int thisMolid = 0;
560 
561  ExtractField(molidStart, molidStop, buffer,
562  "SOURCE", "MOL_ID:");
563  sscanf(buffer,"%d", &thisMolid);
564 
565  if(thisMolid == molid)
566  {
567  ExtractField(molidStart, molidStop, source->scientificName,
568  "SOURCE", "ORGANISM_SCIENTIFIC:");
569  ExtractField(molidStart, molidStop, source->commonName,
570  "SOURCE", "ORGANISM_COMMON:");
571  ExtractField(molidStart, molidStop, source->strain,
572  "SOURCE", "STRAIN:");
573  ExtractField(molidStart, molidStop, buffer,
574  "SOURCE", "ORGANISM_TAXID:");
575  sscanf(buffer,"%d",&source->taxid);
576  return(TRUE);
577  }
578  }
579  }
580 
581  return(FALSE);
582 }
583 
584 
585 /************************************************************************/
586 /*>BOOL blGetCompoundWholePDBMolID(WHOLEPDB *wpdb, int molid,
587  COMPND *compnd)
588  ------------------------------------------------------------
589 *//**
590  \param[in] *wpdb WHOLEPDB structure
591  \param[in] molid MOL_ID from PDB-format COMPND record.
592  \param[out] *compnd Data from the COMPND records
593  \return BOOL Success
594 
595  Extracts the COMPND data for a specified MOL_ID. Returns FALSE if the
596  MOL_ID isn't found
597 
598 - 13.05.15 Original based on blGetCompoundWholePDBChain(). By: CTP
599 */
601  COMPND *compnd)
602 {
603  STRINGLIST *molidFirst,
604  *molidStart,
605  *molidStop,
606  *s;
607 
608  /* reset compnd */
609  compnd->molid = 0;
610  compnd->molecule[0] = '\0';
611  compnd->chain[0] = '\0';
612  compnd->fragment[0] = '\0';
613  compnd->synonym[0] = '\0';
614  compnd->ec[0] = '\0';
615  compnd->engineered[0] = '\0';
616  compnd->mutation[0] = '\0';
617  compnd->other[0] = '\0';
618 
619  /* find start of compnd records */
620  molidFirst = FindNextMolIDRecord(wpdb->header, "COMPND");
621 
622  /* get compound record */
623  for(molidStart=molidFirst; molidStart!=NULL; molidStart=molidStop)
624  {
625  molidStop = FindNextMolIDRecord(molidStart, "COMPND");
626  for(s=molidStart; s!=molidStop; NEXT(s))
627  {
628  char buffer[MAXPDBANNOTATION];
629  int thisMolid = 0;
630 
631  ExtractField(molidStart, molidStop,
632  buffer, "COMPND", "MOL_ID:");
633  sscanf(buffer,"%d", &thisMolid);
634 
635  if(thisMolid == molid)
636  {
637  ExtractField(molidStart, molidStop,
638  compnd->molecule, "COMPND","MOLECULE:");
639  ExtractField(molidStart, molidStop,
640  compnd->chain, "COMPND", "CHAIN:");
641  ExtractField(molidStart, molidStop,
642  compnd->fragment, "COMPND", "FRAGMENT:");
643  ExtractField(molidStart, molidStop,
644  compnd->synonym, "COMPND", "SYNONYM:");
645  ExtractField(molidStart, molidStop,
646  compnd->ec, "COMPND", "EC:");
647  ExtractField(molidStart, molidStop,
648  compnd->engineered, "COMPND", "ENGINEERED:");
649  ExtractField(molidStart, molidStop,
650  compnd->mutation, "COMPND", "MUTATION:");
651  ExtractField(molidStart, molidStop,
652  compnd->other, "COMPND", "OTHER_DETAILS:");
653  ExtractField(molidStart, molidStop,
654  buffer, "COMPND", "MOL_ID:");
655  sscanf(buffer,"%d", &(compnd->molid));
656  return(TRUE);
657  }
658  }
659  }
660 
661  return(FALSE);
662 }
663 
664 /************************************************************************/
665 /*>BOOL blGetSpeciesWholePDBMolID(WHOLEPDB *wpdb, int molid,
666  PDBSOURCE *source)
667  -----------------------------------------------------------
668 *//**
669  \param[in] *wpdb WHOLEPDB structure
670  \param[in] molid MOL_ID from PDB-format SOURCE record.
671  \param[out] *source SOURCE information for chain
672  \return Success (chain found?)
673 
674  Extracts the SOURCE data for a specified MOL_ID. Returns FALSE if not
675  found.
676 
677 - 12.05.15 Original based on blGetSpeciesWholePDBChain(). By: CTP
678 */
680  PDBSOURCE *source)
681 {
682  STRINGLIST *s,
683  *molidFirst = NULL,
684  *molidStart = NULL,
685  *molidStop = NULL;
686 
687  /* reset source */
688  source->scientificName[0] = '\0';
689  source->commonName[0] = '\0';
690  source->strain[0] = '\0';
691  source->taxid = 0;
692 
693  /* find start of source records */
694  molidFirst = FindNextMolIDRecord(wpdb->header, "SOURCE");
695 
696 
697  /* get source record */
698  for(molidStart=molidFirst; molidStart!=NULL; molidStart=molidStop)
699  {
700  molidStop = FindNextMolIDRecord(molidStart, "SOURCE");
701  for(s=molidStart; s!=molidStop; NEXT(s))
702  {
703  char buffer[MAXPDBANNOTATION];
704  int thisMolid = 0;
705 
706  ExtractField(molidStart, molidStop, buffer,
707  "SOURCE", "MOL_ID:");
708  sscanf(buffer,"%d", &thisMolid);
709 
710  if(thisMolid == molid)
711  {
712  ExtractField(molidStart, molidStop, source->scientificName,
713  "SOURCE","ORGANISM_SCIENTIFIC:");
714  ExtractField(molidStart, molidStop, source->commonName,
715  "SOURCE", "ORGANISM_COMMON:");
716  ExtractField(molidStart, molidStop, source->strain,
717  "SOURCE", "STRAIN:");
718  ExtractField(molidStart, molidStop, buffer,
719  "SOURCE", "ORGANISM_TAXID:");
720  sscanf(buffer,"%d",&source->taxid);
721  return(TRUE);
722  }
723  }
724  }
725 
726  return(FALSE);
727 }
728 
729 
730 /************************************************************************/
731 /*>char *blGetSeqresAsStringWholePDB(WHOLEPDB *wpdb, char **chains,
732  MODRES *modres, BOOL doNucleic)
733  -----------------------------------------------------------------
734 *//**
735  \param[in] *wpdb Pointer to whole PDB structure
736  \param[out] **chains Chain labels for the chains - may also be
737  set to NULL if you don't want to record
738  chain labels
739  \param[in] *modres Linked list of MODRES information. May be
740  NULL if you don't want to translate
741  non-standard amino acids.
742  \param[in] doNucleic Read sequence for nucleic acid chains
743  \return malloc()'d Sequence from SEQRES, chains
744  separated by a *
745 
746  Reads sequence from SEQRES records into a character string in 1-letter
747  code. Chains are terminated by * characters.
748 
749 - 21.08.97 Original by: ACRM
750 - 22.08.97 Added chains parameter
751 - 26.08.97 No longer reads DNA/RNA
752 - 07.03.07 Added code to check for modified amino acids
753  Now reads from wpdb rather than from the file
754 - 07.11.14 Initialize lastchain
755 - 11.06.15 Moved to bioplib - doNucleic is now a paramater instead of
756  a global; chains is now an array of strings
757 - 12.06.15 Frees memory and returns NULL if no SEQRES found
758 */
759 char *blGetSeqresAsStringWholePDB(WHOLEPDB *wpdb, char **chains,
760  MODRES *modres, BOOL doNucleic)
761 {
762  static char *sequence = NULL;
763  char buffer[MAXBUFF],
764  chain[blMAXCHAINLABEL],
765  lastchain[blMAXCHAINLABEL],
766  seq3[13][4];
767  int i,
768  nchain = 0,
769  nres = 0,
770  ArraySize = ALLOCSIZE;
771  BOOL AddStar = FALSE;
772  STRINGLIST *s;
773 
774  lastchain[0] = '\0';
775 
776  if((sequence=(char *)malloc(ArraySize * sizeof(char)))==NULL)
777  {
778  return(NULL);
779  }
780  sequence[0] = '\0';
781 
782  for(s=wpdb->header; s!=NULL; NEXT(s))
783  {
784  strncpy(buffer, s->string, MAXBUFF);
785  TERMINATE(buffer);
786  if(!strncmp(buffer,"SEQRES",6))
787  {
788  fsscanf(buffer,
789  "%11x%1s%7x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x\
790 %3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s",
791  chain,
792  seq3[0], seq3[1], seq3[2], seq3[3], seq3[4],
793  seq3[5], seq3[6], seq3[7], seq3[8], seq3[9],
794  seq3[10], seq3[11], seq3[12]);
795 
796  if((nres == 0) && !AddStar)
797  {
798  /* This is the first line so we set the lastchain */
799  strcpy(lastchain, chain);
800  if(chains!=NULL)
801  strncpy(chains[nchain++], chain, blMAXCHAINLABEL);
802  }
803  else if(nres+15 >= ArraySize)
804  {
805  /* Allocate more space if needed */
806  ArraySize += ALLOCSIZE;
807  if((sequence=(char *)realloc((void *)sequence,
808  ArraySize*sizeof(char)))
809  == NULL)
810  {
811  return(NULL);
812  }
813  }
814 
815  if(!CHAINMATCH(chain, lastchain))
816  {
817  sequence[nres++] = '*';
818  strcpy(lastchain, chain);
819  if(chains!=NULL)
820  strncpy(chains[nchain++], chain, blMAXCHAINLABEL);
821  }
822 
823  for(i=0; i<13; i++)
824  {
825  AddStar = TRUE;
826  if(!strncmp(seq3[i]," ",3))
827  break;
828  sequence[nres] = blThronex(seq3[i]);
829 
830  /* 07.03.07 Added code to check for modified amino acids */
831  if(sequence[nres] == 'X')
832  {
833  char tmpthree[8];
834  if(modres != NULL) /* 11.06.15 */
835  {
836  blFindOriginalResType(seq3[i], tmpthree, modres);
837  sequence[nres] = blThronex(tmpthree);
838  }
839  }
840 
841  if(!gBioplibSeqNucleicAcid || doNucleic)
842  nres++;
843  }
844  }
845  }
846 
847  /* If no SEQRES records found, then free the memory and return NULL */
848  if(!strlen(sequence))
849  {
850  free(sequence);
851  sequence = NULL;
852  }
853  else
854  {
855  if(AddStar)
856  {
857  sequence[nres++] = '*';
858  }
859  sequence[nres++] = '\0';
860  if(chains!=NULL)
861  chains[nchain][0] = '\0';
862  }
863 
864  return(sequence);
865 }
866 
867 
868 
869 /************************************************************************/
870 /*>MODRES *blGetModresWholePDB(WHOLEPDB *wpdb)
871  -------------------------------------------
872 *//**
873  \param[in]
874  \return
875 
876  Reads MODRES records from a Whole PDB structure and returns a linked
877  list containing the information
878 
879 - 07.03.07 Original By: ACRM
880 - 11.06.15 Moved to Bioplib
881 */
883 {
884  STRINGLIST *s;
885  char *ch;
886  MODRES *modres = NULL,
887  *m = NULL;
888 
889 
890  for(s=wpdb->header; s!=NULL; NEXT(s))
891  {
892  if(!strncmp(s->string, "MODRES", 6))
893  {
894  if(m==NULL)
895  {
896  INIT(modres, MODRES);
897  m = modres;
898  }
899  else
900  {
901  ALLOCNEXT(m, MODRES);
902  }
903  if(m==NULL)
904  {
905  fprintf(stderr,"pdb2pir: Error! No memory for modres\n");
906  exit(1);
907  }
908 
909  ch = s->string+12;
910  strncpy(m->modres, ch, 3);
911  PADCHARMINTERM(m->modres, ' ', 4);
912 
913  ch = s->string+24;
914  strncpy(m->origres, ch, 3);
915  PADCHARMINTERM(m->origres, ' ', 4);
916  if(m->origres[0] == ' ')
917  {
918  strncpy(m->origres, "XXX ", 4);
919  }
920  }
921  }
922  return(modres);
923 }
924 
925 
926 /************************************************************************/
927 /*>void blFindOriginalResType(char *modAA, char *stdAA, MODRES *modres)
928  --------------------------------------------------------------------
929 *//**
930  \param[in] *modAA Non-standard (modified) amino acid name
931  \param[out] *stdAA Standard amino acid from which it was derived
932  \param[in] *modres MODRES linked list
933 
934  Uses the MODRES information to identify the original (standard)
935  amino acid from which a modified amino acid was derived
936 
937 - 07.03.07 Original By: ACRM
938 - 11.06.15 Moved to bioplib, renamed routine and parameters
939 */
940 void blFindOriginalResType(char *modAA, char *stdAA, MODRES *modres)
941 {
942  MODRES *m;
943  for(m=modres; m!=NULL; NEXT(m))
944  {
945  if(!strncmp(modAA, m->modres, 3))
946  {
947  strncpy(stdAA, m->origres, 3);
948  PADCHARMINTERM(stdAA, ' ', 4);
949  return;
950  }
951  }
952 }
953 
954 /************************************************************************/
955 /*>static BIOMOLECULE *doRemark300(WHOLEPDB *wpdb)
956  -----------------------------------------------
957 *//**
958  \param[in] *wpdb WHOLEPDB linked list
959  \return Pointer to a BIOMOLECULE structure that we
960  have populated
961 
962  Identifies REMARK 300 header lines and extracts the maximum number
963  of biomolecules and any 'details' comments that appear after the
964  standard REMARK 300 comments
965 
966  Returns a BIOMOLECULE linked list that only has one item for REMARK
967  300. The 'details' that this code finds are just placed in the
968  initial entry of the linked list.
969 
970 - 26.06.15 Original By: ACRM
971 */
972 static BIOMOLECULE *doRemark300(WHOLEPDB *wpdb)
973 {
974  BIOMOLECULE *biomolecule = NULL;
975  STRINGLIST *s;
976  int SkipStandardRemark = 0;
977 
978  for(s=wpdb->header; s!=NULL; NEXT(s))
979  {
980  if(!strncmp(s->string, "REMARK 300", 10))
981  {
982  if(!strncmp(s->string, "REMARK 300 BIOMOLECULE:", 23))
983  {
984  char buffer[80],
985  *chp;
986 
987  /* Allocate space for the BIOMOLECULE structure */
988  if(biomolecule == NULL)
989  {
990  INIT(biomolecule, BIOMOLECULE);
991  if(biomolecule == NULL)
992  return(NULL);
993  CLEAR_BIOMOL(biomolecule);
994  }
995 
996  SkipStandardRemark = 1;
997 
998  /* Copy the actual data and remove trailing spaces */
999  strncpy(buffer, s->string+24, 80);
1000  TERMINATE(buffer);
1001  KILLTRAILSPACES(buffer);
1002 
1003  /* Now move to the last space */
1004  if((chp = strrchr(buffer, ' '))==NULL)
1005  {
1006  chp = buffer;
1007  }
1008  else
1009  {
1010  chp++;
1011  }
1012 
1013  sscanf(chp, "%d", &(biomolecule->numBiomolecules));
1014  }
1015 
1016  if(SkipStandardRemark)
1017  {
1018  if(SkipStandardRemark++ > 5)
1019  {
1020  char buffer[80];
1021 
1022  strncpy(buffer, s->string+11, 80);
1023  TERMINATE(buffer);
1024  KILLTRAILSPACES(buffer);
1025  if(strlen(buffer))
1026  {
1027  biomolecule->details =
1028  blStoreString(biomolecule->details, buffer);
1029  }
1030  }
1031  }
1032  }
1033  }
1034 
1035  return(biomolecule);
1036 }
1037 
1038 
1039 /************************************************************************/
1040 /*>static BIOMOLECULE *doRemark350(WHOLEPDB *wpdb,
1041  BIOMOLECULE *biomolecule)
1042  ---------------------------------------------------------
1043 *//**
1044  \param[in] *wpdb WHOLEPDB linked list
1045  \param[in] *biomolecule Pointer to a BIOMOLECULE linked list.
1046  (Can be NULL)
1047  \return Pointer to a BIOMOLECULE structure that we
1048  have populated
1049 
1050  Parses the REMARK 350 header lines. If more than one biomolecule is
1051  found, the biomolecule linked list is extended for each new one.
1052 
1053  This routine stores the author and software determined assembly size
1054  and the list of chains associated with a given biomolecule. It also
1055  contains a linked list of BIOMT structures which have the
1056  transformation matrices needed to recreate the biomolecule.
1057 
1058  Note that the BIOMOLECULE.details and BIOMOLECULE.numBiomolecules
1059  will only be populated for the first item in the list (and by
1060  doRemark300() not by this routine).
1061 
1062 - 26.06.15 Original By: ACRM
1063 */
1064 static BIOMOLECULE *doRemark350(WHOLEPDB *wpdb, BIOMOLECULE *biomolecule)
1065 {
1066  BIOMOLECULE *bm = NULL; /* The current biomolecule */
1067  BIOMT *biomt = NULL;
1068  BOOL firstRecord = TRUE;
1069  STRINGLIST *s;
1070 
1071  if(biomolecule != NULL)
1072  bm=biomolecule;
1073 
1074  for(s=wpdb->header; s!=NULL; NEXT(s))
1075  {
1076  if(!strncmp(s->string, "REMARK 350", 10))
1077  {
1078  /* Allocate space for the BIOMOLECULE structure. Should have been
1079  allocated by REMARK 300 code, but this is in case REMARK 300
1080  is missing
1081  */
1082  if(biomolecule == NULL)
1083  {
1084  INIT(biomolecule, BIOMOLECULE);
1085  if(biomolecule == NULL)
1086  return(NULL);
1087  CLEAR_BIOMOL(biomolecule);
1088  bm = biomolecule;
1089  }
1090 
1091  if(!strncmp(s->string,
1092  "REMARK 350 BIOMOLECULE:", 23))
1093  {
1094  if(!firstRecord)
1095  {
1096  /* Allocate space for new biomolecule */
1097  ALLOCNEXT(bm, BIOMOLECULE);
1098  if(bm == NULL)
1099  {
1100  blFreeBiomolecule(biomolecule);
1101  return(NULL);
1102  }
1103 
1104  CLEAR_BIOMOL(bm);
1105  }
1106  sscanf(s->string+23, "%d", &(bm->biomolNumber));
1107 
1108  firstRecord = FALSE;
1109  }
1110  else if(!strncmp(s->string,
1111  "REMARK 350 AUTHOR DETERMINED", 28))
1112  {
1113  strncpy(bm->authorUnit, s->string+46, 40);
1114  TERMINATE(bm->authorUnit);
1116  }
1117  else if(!strncmp(s->string,
1118  "REMARK 350 SOFTWARE DETERMINED", 30))
1119  {
1120  strncpy(bm->softwareUnit, s->string+53, 40);
1121  TERMINATE(bm->softwareUnit);
1123  }
1124  else if(!strncmp(s->string,
1125  "REMARK 350 APPLY THE FOLLOWING TO CHAINS:",41))
1126  {
1127  char buffer[80];
1128 
1129  /* Copy the chain information skipping any spaces */
1130  STRNCPYNOSPACES(buffer, (s->string+42), 80);
1131  TERMINATE(buffer);
1132 
1133  /* Remove any chain information already stored */
1134  if(bm->chains != NULL)
1135  {
1136  free(bm->chains);
1137  bm->chains = NULL;
1138  }
1139 
1140  /* Append the chain info */
1141  bm->chains = blStrcatalloc(bm->chains, buffer);
1142  }
1143  else if(!strncmp(s->string,
1144  "REMARK 350 AND CHAINS:",41))
1145  {
1146  char buffer[80];
1147 
1148  /* Copy the chain information skipping any spaces */
1149  STRNCPYNOSPACES(buffer, (s->string+42), 80);
1150  TERMINATE(buffer);
1151 
1152  /* Append the chain info */
1153  bm->chains = blStrcatalloc(bm->chains, buffer);
1154  }
1155  else if(!strncmp(s->string, "REMARK 350 BIOMT", 18))
1156  {
1157  char buffer[80];
1158  int line,
1159  entry;
1160  REAL val[4];
1161 
1162  strncpy(buffer, s->string+18, 80);
1163  TERMINATE(buffer);
1164  if(sscanf(buffer, "%d %d %lf %lf %lf %lf",
1165  &line, &entry, &val[0], &val[1], &val[2], &val[3]))
1166  {
1167  /* Nothing defined yet so create entry & set entry number*/
1168  if(bm->biomt == NULL)
1169  {
1170  INIT(bm->biomt, BIOMT);
1171  biomt = bm->biomt;
1172  if(biomt!=NULL)
1173  biomt->biomtNum = entry;
1174  }
1175 
1176  /* If this entry is a different entry number, allocate a
1177  new item
1178  */
1179  if(entry != biomt->biomtNum)
1180  ALLOCNEXT(biomt, BIOMT);
1181 
1182  /* Copy in the data */
1183  if(biomt != NULL)
1184  {
1185  int i;
1186 
1187  biomt->biomtNum = entry;
1188  for(i=0; i<3; i++)
1189  biomt->rotMatrix[line-1][i] = val[i];
1190  biomt->transMatrix[line-1] = val[3];
1191  }
1192  }
1193  }
1194  }
1195  }
1196 
1197  return(biomolecule);
1198 }
1199 
1200 
1201 
1202 /************************************************************************/
1203 /*>BIOMOLECULE *blGetBiomoleculeWholePDB(WHOLEPDB *wpdb)
1204  -----------------------------------------------------
1205 *//**
1206  \param[in] *wpdb Pointer to WHOLEPDB structure
1207  \return Pointer to malloc()'d BIOMOLECULE linked list
1208 
1209  Reads the biomolecule assembly data from REMARK 300 and REMARK 350
1210  headers.
1211 
1212  The returned pointer is a linked list of BIOMOLECULE structures.
1213  The first item in the list contains the information from REMARK 300:
1214  the total number of biomolecule assemblies and any additional
1215  information provided in REMARK 300 beyond the standard comments.
1216  The additional information (biomolecule->details) is a STRINGLIST
1217  linked list.
1218 
1219  The list of chains will appear in biomolcule->chains as a comma
1220  separated list and the transformation to be applied to them
1221  will appear in the biomolecule->biomt structure. This is also
1222  a linked list so if multiple transformations are required these
1223  will appear in further entries to this list.
1224 
1225  Additional entries will be found in the BIOMOLECULE linked list
1226  for each different biomolecule.
1227 
1228  See the test code in PDBHeaderInfo.c for example usage.
1229 
1230 - 26.06.15 Original By: ACRM
1231 */
1233 {
1234  BIOMOLECULE *biomolecule = NULL;
1235 
1236  biomolecule = doRemark300(wpdb);
1237  biomolecule = doRemark350(wpdb, biomolecule);
1238 
1239  return(biomolecule);
1240 }
1241 
1242 
1243 /************************************************************************/
1244 /*>void blFreeBiomolecule(BIOMOLECULE *biomolecule)
1245  ------------------------------------------------
1246  \param[in] *biomolecule Pointer to BIOMOLECULE linked list
1247 
1248  Frees the data associated with a BIOMOLECULE linked list
1249 
1250 - 26.06.15 Original By: ACRM
1251 */
1252 void blFreeBiomolecule(BIOMOLECULE *biomolecule)
1253 {
1254  BIOMOLECULE *bm;
1255 
1256  if(biomolecule == NULL)
1257  return;
1258 
1259  /* Free the REMARK 300 data stored in the first item in the list */
1260  if(biomolecule->details != NULL)
1261  FREESTRINGLIST(biomolecule->details);
1262 
1263  /* Free chain and BIOMT data associated with each biomolecule */
1264  for(bm=biomolecule; bm!=NULL; NEXT(bm))
1265  {
1266  if(bm->chains != NULL)
1267  free(bm->chains);
1268 
1269  if(bm->biomt)
1270  FREELIST((bm->biomt), BIOMT);
1271  }
1272 
1273  /* Free the biomolecule linked list */
1274  FREELIST(biomolecule, BIOMOLECULE);
1275 }
1276 
1277 
1278 
1279 
1280 
1281 
1282 /************************************************************************/
1283 /*>char *blGetSeqresByChainWholePDB(WHOLEPDB *wpdb, MODRES *modres,
1284  BOOL doNucleic)
1285  ----------------------------------------------------------------
1286 *//**
1287  \param[in] *wpdb Pointer to whole PDB structure
1288  \param[in] *modres Linked list of MODRES information. May be
1289  NULL if you don't want to translate
1290  non-standard amino acids.
1291  \param[in] doNucleic Read sequence for nucleic acid chains
1292  \return A hash of 1-letter code sequences indexed by
1293  chain label
1294 
1295  Reads sequence from SEQRES records in 1-letter code, storing the
1296  results in a hash indexed by chain label.
1297 
1298 - 25.11.15 Original by: ACRM
1299 */
1301  BOOL doNucleic)
1302 {
1303  static char *sequence = NULL;
1304  char buffer[MAXBUFF],
1305  chain[blMAXCHAINLABEL],
1306  lastchain[blMAXCHAINLABEL],
1307  seq3[13][4];
1308  int i,
1309  nres = 0,
1310  ArraySize = ALLOCSIZE;
1311  BOOL gotSequence = FALSE;
1312  STRINGLIST *s;
1313 
1314  HASHTABLE *hash;
1315 
1316  /* Initialize hash with 11 bins */
1317  if((hash = blInitializeHash(11))==NULL)
1318  return(NULL);
1319  /* Initialize string to store the sequence */
1320  if((sequence=(char *)malloc(ArraySize * sizeof(char)))==NULL)
1321  return(NULL);
1322 
1323  lastchain[0] = '\0';
1324  sequence[0] = '\0';
1325 
1326  for(s=wpdb->header; s!=NULL; NEXT(s))
1327  {
1328  strncpy(buffer, s->string, MAXBUFF);
1329  TERMINATE(buffer);
1330  if(!strncmp(buffer,"SEQRES",6))
1331  {
1332  fsscanf(buffer,
1333  "%11x%1s%7x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x\
1334 %3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s",
1335  chain,
1336  seq3[0], seq3[1], seq3[2], seq3[3], seq3[4],
1337  seq3[5], seq3[6], seq3[7], seq3[8], seq3[9],
1338  seq3[10], seq3[11], seq3[12]);
1339 
1340  if(lastchain[0] == '\0')
1341  {
1342  /* This is the first line so we set the lastchain */
1343  strcpy(lastchain, chain);
1344  }
1345 
1346  if(nres+15 >= ArraySize)
1347  {
1348  /* Allocate more space if needed */
1349  ArraySize += ALLOCSIZE;
1350  if((sequence=(char *)realloc((void *)sequence,
1351  ArraySize*sizeof(char)))
1352  == NULL)
1353  {
1354  return(NULL);
1355  }
1356  }
1357 
1358  /* Test if chain has changed */
1359  if(!CHAINMATCH(chain, lastchain))
1360  {
1361  sequence[nres++] = '\0';
1362  if(!blSetHashValueString(hash, lastchain, sequence))
1363  {
1364  blFreeHash(hash);
1365  free(sequence);
1366  return(NULL);
1367  }
1368 
1369  sequence[0] = '\0';
1370  strcpy(lastchain, chain);
1371  nres = 0;
1372  }
1373 
1374  for(i=0; i<13; i++)
1375  {
1376  if(!strncmp(seq3[i]," ",3))
1377  break;
1378  sequence[nres] = blThronex(seq3[i]);
1379 
1380  /* 07.03.07 Added code to check for modified amino acids */
1381  if(sequence[nres] == 'X')
1382  {
1383  char tmpthree[8];
1384  if(modres != NULL) /* 11.06.15 */
1385  {
1386  blFindOriginalResType(seq3[i], tmpthree, modres);
1387  sequence[nres] = blThronex(tmpthree);
1388  }
1389  }
1390 
1391  if(!gBioplibSeqNucleicAcid || doNucleic)
1392  {
1393  gotSequence=TRUE;
1394  nres++;
1395  }
1396  }
1397  }
1398  }
1399 
1400  /* If no SEQRES records found, then free the memory and return NULL */
1401  if(!gotSequence)
1402  {
1403  free(hash);
1404  hash = NULL;
1405  }
1406  else
1407  {
1408  sequence[nres++] = '\0';
1409  blSetHashValueString(hash, lastchain, sequence);
1410  }
1411  free(sequence);
1412 
1413  return(hash);
1414 }
1415 
1416 
1417 
1418 /************************************************************************/
1419 #ifdef TEST
1420 int main(int argc, char **argv)
1421 {
1422  WHOLEPDB *wpdb;
1423  FILE *in;
1424  char header[80],
1425  date[16],
1426  pdbcode[8],
1427  *title,
1428  **chainLabels;
1429  int nChains,
1430  i;
1431  PDBSOURCE species;
1432  BIOMOLECULE *biomolecule = NULL,
1433  *bm = NULL;
1434  STRINGLIST *s;
1435  HASHTABLE *seqres = NULL;
1436 
1437  if((in=fopen(argv[1], "r"))!=NULL)
1438  {
1439  if((wpdb = blReadWholePDB(in))!=NULL)
1440  {
1441  if(blGetHeaderWholePDB(wpdb,
1442  header, 80,
1443  date, 16,
1444  pdbcode, 8))
1445  {
1446  printf("Header: '%s'\n", header);
1447  printf("Date: '%s'\n", date);
1448  printf("PDB code: '%s'\n", pdbcode);
1449  }
1450 
1451  if((title = blGetTitleWholePDB(wpdb))!=NULL)
1452  {
1453  printf("Title: '%s'\n", title);
1454  }
1455 
1456  if((biomolecule = blGetBiomoleculeWholePDB(wpdb))!=NULL)
1457  {
1458  printf("Number of Biomolecules: %d\n",
1459  biomolecule->numBiomolecules);
1460  for(s=biomolecule->details; s!=NULL; NEXT(s))
1461  {
1462  printf("REMARK 300 Details: %s\n", s->string);
1463  }
1464 
1465  for(bm=biomolecule; bm!=NULL; NEXT(bm))
1466  {
1467  BIOMT *bmt;
1468 
1469  printf("Biomolecule: %d\n", bm->biomolNumber);
1470  printf(" Author Unit: %s\n", bm->authorUnit);
1471  printf(" Software Unit: %s\n", bm->softwareUnit);
1472  printf(" Chains: %s\n",
1473  (bm->chains?bm->chains:""));
1474 
1475  for(bmt=bm->biomt; bmt!=NULL; NEXT(bmt))
1476  {
1477  int i;
1478 
1479  printf(" Matrix %d\n", bmt->biomtNum);
1480  for(i=0; i<3; i++)
1481  {
1482  printf(" %8.6f %8.6f %8.6f %8.6f\n",
1483  bmt->rotMatrix[i][0],
1484  bmt->rotMatrix[i][1],
1485  bmt->rotMatrix[i][2],
1486  bmt->transMatrix[i]);
1487  }
1488  }
1489  }
1490 
1491  blFreeBiomolecule(biomolecule);
1492  }
1493 
1494  chainLabels = blGetPDBChainLabels(wpdb->pdb, &nChains);
1495  for(i=0; i<nChains; i++)
1496  {
1497  COMPND compound;
1498 
1499  printf("\n\n>>>Chain: %s\n", chainLabels[i]);
1500 
1501  blGetCompoundWholePDBChain(wpdb, chainLabels[i], &compound);
1502 
1503  printf("molid: %d\n", compound.molid);
1504  printf("molecule: %s\n", compound.molecule);
1505  printf("chain: %s\n", compound.chain);
1506  printf("fragment: %s\n", compound.fragment);
1507  printf("synonym: %s\n", compound.synonym);
1508  printf("ec: %s\n", compound.ec);
1509  printf("engineered: %s\n", compound.engineered);
1510  printf("mutation: %s\n", compound.mutation);
1511  printf("other: %s\n", compound.other);
1512 
1513  if(blGetSpeciesWholePDBChain(wpdb, chainLabels[i], &species))
1514  {
1515  printf("Scientific name: %s\n", species.scientificName);
1516  printf("Common name: %s\n", species.commonName);
1517  printf("Strain: %s\n", species.strain);
1518  printf("Tax ID: %d\n", species.taxid);
1519  }
1520 
1521  free(chainLabels[i]);
1522  }
1523  free(chainLabels);
1524 
1525  if((seqres = blGetSeqresByChainWholePDB(wpdb, NULL, FALSE))!=NULL)
1526  {
1527  char **chains = NULL;
1528  if((chains = blGetHashKeyList(seqres))!=NULL)
1529  {
1530  int i;
1531 
1532  printf("\n\nSEQRES Sequence data:\n");
1533 
1534  for(i=0; chains[i]!=NULL; i++)
1535  {
1536  printf("Chain: %2s Seq: %s\n", chains[i],
1537  blGetHashValueString(seqres, chains[i]));
1538  }
1539 
1540  blFreeHashKeyList(chains);
1541  }
1542 
1543  blFreeHash(seqres);
1544  }
1545  }
1546  }
1547 
1548  return(0);
1549 }
1550 #endif
1551 
void blFreeBiomolecule(BIOMOLECULE *biomolecule)
int biomtNum
Definition: pdb.h:411
#define ALLOCNEXT(x, y)
Definition: macros.h:251
#define MAXPDBANNOTATION
Definition: pdb.h:245
BOOL blGetCompoundWholePDBChain(WHOLEPDB *wpdb, char *chain, COMPND *compnd)
int main(int argc, char **argv)
Definition: test.c:4
char strain[MAXPDBANNOTATION]
Definition: pdb.h:395
MODRES * blGetModresWholePDB(WHOLEPDB *wpdb)
Include file for PDB routines.
BOOL blGetSpeciesWholePDBMolID(WHOLEPDB *wpdb, int molid, PDBSOURCE *source)
#define FREESTRINGLIST(l)
Definition: general.h:88
REAL rotMatrix[3][3]
Definition: pdb.h:412
char blThronex(char *three)
Definition: throne.c:188
char * chains
Definition: pdb.h:424
char fragment[MAXPDBANNOTATION]
Definition: pdb.h:383
short BOOL
Definition: SysDefs.h:64
#define NULL
Definition: array2.c:99
int biomolNumber
Definition: pdb.h:423
char ** blGetHashKeyList(HASHTABLE *hashtable)
Definition: hash.c:226
int numBiomolecules
Definition: pdb.h:421
#define KILLTRAILSPACES(x)
Definition: macros.h:414
int blFindMolID(WHOLEPDB *wpdb, char *chain)
char modres[8]
Definition: pdb.h:404
char * blGetSeqresAsStringWholePDB(WHOLEPDB *wpdb, char **chains, MODRES *modres, BOOL doNucleic)
char engineered[MAXPDBANNOTATION]
Definition: pdb.h:383
Defines for using hash functions.
STRINGLIST * details
Definition: pdb.h:420
char * blStrncat(char *out, const char *in, size_t len)
Definition: stringcat.c:97
BOOL gBioplibSeqNucleicAcid
Definition: throne.c:130
STRINGLIST * header
Definition: pdb.h:375
char * blGetHashValueString(HASHTABLE *hashtable, char *key)
Definition: hash.c:620
#define FALSE
Definition: macros.h:223
Definition: pdb.h:372
#define NEXT(x)
Definition: macros.h:249
Definition: pdb.h:401
BOOL blSetHashValueString(HASHTABLE *hashtable, char *key, char *value)
Definition: hash.c:452
Useful macros.
BIOMT * biomt
Definition: pdb.h:427
char softwareUnit[40]
Definition: pdb.h:424
WHOLEPDB * blReadWholePDB(FILE *fpin)
Definition: ReadPDB.c:2328
char * blCollapseSpaces(char *inText)
Definition: stringutil.c:108
#define MIN(a, b)
Definition: macros.h:240
#define TERMINATE(x)
Definition: macros.h:366
BOOL blGetCompoundWholePDBMolID(WHOLEPDB *wpdb, int molid, COMPND *compnd)
int fsscanf(char *buffer, char *format,...)
Definition: fsscanf.c:177
double REAL
Definition: MathType.h:67
HASHTABLE * blInitializeHash(ULONG hashsize)
Definition: hash.c:163
char * blStrcatalloc(char *instr, char *catstr)
Definition: strcatalloc.c:100
PDB * pdb
Definition: pdb.h:374
Header file for sequence handling.
Definition: hash.h:85
char other[MAXPDBANNOTATION]
Definition: pdb.h:383
char commonName[MAXPDBANNOTATION]
Definition: pdb.h:395
void blFreeHashKeyList(char **keylist)
Definition: hash.c:200
struct _stringlist * next
Definition: general.h:84
void blFreeHash(HASHTABLE *hashtable)
Definition: hash.c:293
int molid
Definition: pdb.h:382
Include file for fsscanf()
HASHTABLE * blGetSeqresByChainWholePDB(WHOLEPDB *wpdb, MODRES *modres, BOOL doNucleic)
char synonym[MAXPDBANNOTATION]
Definition: pdb.h:383
#define TRUE
Definition: macros.h:219
char * blGetWord(char *buffer, char *word, int maxsize)
Definition: GetWord.c:268
Definition: pdb.h:408
Definition: pdb.h:380
char ec[MAXPDBANNOTATION]
Definition: pdb.h:383
BOOL blGetSpeciesWholePDBChain(WHOLEPDB *wpdb, char *chain, PDBSOURCE *source)
int taxid
Definition: pdb.h:398
char chain[MAXPDBANNOTATION]
Definition: pdb.h:383
char authorUnit[40]
Definition: pdb.h:424
STRINGLIST * blStoreString(STRINGLIST *StringList, char *string)
Definition: StoreString.c:131
Header file for general purpose routines.
BIOMOLECULE * blGetBiomoleculeWholePDB(WHOLEPDB *wpdb)
BOOL blGetHeaderWholePDB(WHOLEPDB *wpdb, char *header, int maxheader, char *date, int maxdate, char *pdbcode, int maxcode)
#define CHAINMATCH(chain1, chain2)
Definition: pdb.h:495
#define MAXBUFF
#define FREELIST(y, z)
Definition: macros.h:264
#define blMAXCHAINLABEL
Definition: pdb.h:248
#define INIT(x, y)
Definition: macros.h:244
char molecule[MAXPDBANNOTATION]
Definition: pdb.h:383
#define STRNCPYNOSPACES(out, in, mx)
Definition: macros.h:540
REAL transMatrix[3]
Definition: pdb.h:412
char * string
Definition: general.h:85
char ** blGetPDBChainLabels(PDB *pdb, int *nChains)
char * blGetTitleWholePDB(WHOLEPDB *wpdb)
#define PADCHARMINTERM(s, c, l)
Definition: macros.h:474
char origres[8]
Definition: pdb.h:405
char scientificName[MAXPDBANNOTATION]
Definition: pdb.h:395
void blFindOriginalResType(char *modAA, char *stdAA, MODRES *modres)
#define ALLOCSIZE
char mutation[MAXPDBANNOTATION]
Definition: pdb.h:383
#define CLEAR_BIOMOL(b)
#define MAXWORD