The outage for Sunday 24th November has been cancelled.
Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
secstr.c
Go to the documentation of this file.
1 #define TESTCODE 1
2 /************************************************************************/
3 /**
4 
5  \File secstruc.c
6 
7  \version V1.1
8  \date 10.07.15
9  \brief Secondary structure calculation
10 
11  \copyright (c) Dr. Andrew C. R. Martin, UCL, 1988-2015
12  \author Dr. Andrew C. R. Martin
13  \par
14  Institute of Structural & Molecular Biology,
15  University College London,
16  Gower Street,
17  London.
18  WC1E 6BT.
19  \par
20  andrew@bioinf.org.uk
21  andrew.martin@ucl.ac.uk
22 
23 **************************************************************************
24 
25  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
26  according to the conditions laid out in the accompanying file
27  COPYING.DOC.
28 
29  The code may be modified as required, but any modifications must be
30  documented so that the person responsible can be identified.
31 
32  The code may not be sold commercially or included as part of a
33  commercial product except as described in the file COPYING.DOC.
34 
35 **************************************************************************
36 
37  Description:
38  ============
39 
40 
41 **************************************************************************
42 
43  Usage:
44  ======
45 
46 **************************************************************************
47 
48  Revision History:
49  =================
50 
51  V1.0 19.05.99 Original, written while at Inpharmatica By: ACRM
52  V1.1 10.07.15 Modified for BiopLib
53  17.02.16 xyz for coordinates now count from zero
54  CalcDihedral() also now uses arrays that count from
55  zero
56  18.02.16 NUM_MC_ATOM_TYPES now counts from zero
57  MAX_NUM_ANGLES now counts from zero
58 
59 *************************************************************************/
60 /* Doxygen
61  -------
62  #GROUP Handling PDB Data
63  #SUBGROUP Analyzing structures
64  #FUNCTION blCalcSecStrucPDB()
65  Calculate secondary structure populating the secstr field of the PDB
66  structure.
67 */
68 /************************************************************************/
69 /* Includes
70 */
71 #include <stdio.h>
72 #include <string.h>
73 #include <stdlib.h>
74 #include <math.h>
75 
76 #include "pdb.h"
77 #include "array.h"
78 #include "MathType.h"
79 #include "SysDefs.h"
80 #include "macros.h"
81 #include "angle.h"
82 #include "secstr.h"
83 
84 /************************************************************************/
85 /* Defines and macros
86 */
87 #define MAXBUFF 160
88 #define COORD_DIM 3 /* number of coord dimensions: x, y, z */
89 #define NUM_STRAND_CHARS 26 /* number of available strand characters
90  (the 26 letters of the alphabet) */
91 #define MAX_NUM_HBOND 4 /* number of allowed H-bonds */
92 #define MAX_NUM_CHN 200 /* number of allowed 'chains'; the way
93  the code is used, it should be handled
94  an individual chain, but it can also
95  cope with (up to 50) multiple chains
96  and has to allow this to deal with
97  apparent chain breaks resulting from
98  missing residues */
99 
100 /* Used by MakeTurnsAndBridges() */
101 #define NRULES 3
102 #define PARLEL 1
103 
104 #define NUM_BRIDGE 4 /* Max bridge index */
105 #define NUM_STRAND_PAIR 2 /* Paired strands */
106 #define NUM_BRIDGE_PAIR 2 /* Paired strands for bridge */
107 
108 #define NUM_DIHED_DATA 3 /* number of dihedral data points */
109 #define RESTYPE_PROLINE 15 /* residue type number for Proline - an
110  offset into the KnownResidueTypes
111  array */
112 
113 #define NUM_MC_ATOM_TYPES 6 /* number of M/C atom types */
114 #define ATOM_N 0 /* atom indexes */
115 #define ATOM_CA 1
116 #define ATOM_C 2
117 #define ATOM_O 3
118 #define ATOM_H 4
119 #define ATOM_CB 5
120 
121 #define MAX_NUM_ANGLES 7 /* number of angles calculated */
122 #define NUM_STANDARD_DIHEDRALS 4 /* number of standard dihedrals */
123 #define DIHED_PHI 0 /* dihedral indexes */
124 #define DIHED_PSI 1
125 #define DIHED_OMEGA 2
126 #define DIHED_CHIRAL 3
127 #define DIHED_IMPLD 4
128 #define DIHED_KAPPA 5
129 #define DIHED_TCO 6
130 
131 #define NUM_STRUC_SYMS (11) /* number of structure symbols */
132 #define SECSTR_IDX_ALPHAH 0 /* Symbol indexes */
133 #define SECSTR_IDX_SHEET1 1
134 #define SECSTR_IDX_SHEET 2
135 #define SECSTR_IDX_BRIDGE 3
136 #define SECSTR_IDX_BRIDG1 4
137 #define SECSTR_IDX_BRIDG2 5
138 #define SECSTR_IDX_THRTNH 6
139 #define SECSTR_IDX_PIH 7
140 #define SECSTR_IDX_TURN 8
141 #define SECSTR_IDX_BEND 9
142 #define SECSTR_IDX_CHISGN 10
143 
144 #define NUM_HELIX_TYPE 3 /* number of helix types */
145 #define HELIX_CHUNK_SIZE 4 /* Helices are made in chunks of this */
146 #define THREE_TEN_CHUNK_SIZE 3 /* 3_10 helices in chunks of this */
147 #define PIH_CHUNK_SIZE 5 /* Pi helices in chunks of this */
148 
149 #define BRIDGE_SEP 2 /* bridge separation */
150 #define BULGE_SIZE_L 5
151 #define BULGE_SIZE_S 2
152 
153 #define CST 0 /* Offsets to store HBond energies */
154 #define NST 2
155 
156 #define NUM_TURN_TYPE 3 /* number of turn types */
157 
158 #define RADIAN (180.0/3.141592) /* constant to convert RADs to degrees */
159 #define NULLVAL 999.9 /* used as a NULL value */
160 
161 #define MAX_PEPTIDE_BOND 2.5 /* maximum peptide bond length */
162 #define MAX_CA_DISTANCE 5.0 /* maximum CA-CA distance */
163 
164 #define BEND_SIZE 70.0 /* mainchain atoms bent by more than this
165  angle are flagged as "bend" */
166 
167 /* Constants used by MakeHBonds() to create hydrogen bonds */
168 #define MAX_HBOND_ENERGY -0.5
169 #define HBOND_ENERGY_LIMIT -9.9
170 #define HBOND_MAX_CA_DIST 8.0
171 #define HBOND_Q1 0.42
172 #define HBOND_Q2 0.2
173 #define HBOND_F 332
174 
175 /* Macros for approximate equality and inequality */
176 #define ACCURACY 0.0001
177 #define APPROXEQ(x, y) (ABS((x)-(y)) < ACCURACY)
178 #define APPROXNE(x, y) (ABS((x)-(y)) >= ACCURACY)
179 
180 /* Macro to calculate atom distance */
181 #define ATDIST(a, b) \
182  sqrt(((a)[0]-(b)[0])*((a)[0]-(b)[0]) + \
183  ((a)[1]-(b)[1])*((a)[1]-(b)[1]) + \
184  ((a)[2]-(b)[2])*((a)[2]-(b)[2]))
185 
186 /* Return the nearest integer (cast to a REAL) */
187 #define ANINT(x) ((REAL)(int)((x) + (((x) >= 0.0)?0.5:(-0.5))))
188 
189 /* Free memory in blCalcSecStrPDB() */
190 #define FREE_SECSTR_MEMORY \
191  do { \
192  if(gotAtom != NULL) \
193  blFreeArray2D((char **)gotAtom, NUM_MC_ATOM_TYPES, seqlenMalloc); \
194  if(mcCoords != NULL) \
195  blFreeArray3D((char ***)mcCoords, NUM_MC_ATOM_TYPES, seqlenMalloc, \
196  COORD_DIM); \
197  if(hbondEnergy != NULL) \
198  blFreeArray2D((char **)hbondEnergy, seqlenMalloc, MAX_NUM_HBOND); \
199  if(mcAngles != NULL) \
200  blFreeArray2D((char **)mcAngles, MAX_NUM_ANGLES, seqlenMalloc); \
201  if(detailSS != NULL) free(detailSS); \
202  if(finalSS != NULL) free(finalSS); \
203  if(breakSymbol != NULL) free(breakSymbol); \
204  if(ssTable != NULL) \
205  blFreeArray2D((char **)ssTable, NUM_STRUC_SYMS, seqlenMalloc); \
206  if(residueID != NULL) \
207  blFreeArray2D((char **)residueID, seqlenMalloc, 16); \
208  if(residueTypes != NULL) free(residueTypes); \
209  if(hbond != NULL) \
210  blFreeArray2D((char **)hbond, seqlenMalloc, MAX_NUM_HBOND); \
211  if(bridgePoints != NULL) \
212  blFreeArray2D((char **)bridgePoints, NUM_BRIDGE_PAIR, \
213  seqlenMalloc); \
214  } while(0)
215 
216 
217 /************************************************************************/
218 /* Globals
219 */
220 
221 /************************************************************************/
222 /* Prototypes
223 */
224 static void ExtractPDBData(PDB *pdbStart, PDB *pdbStop,
225  REAL ***mcCoords, BOOL **gotAtom,
226  char **seqbcd, BOOL *caOnly, int *seqlen,
227  int *residueTypes, char *KnownResidueIndex);
228 static void SetPDBSecStruc(PDB *pdbStart, PDB *pdbStop, char *finalSS);
229 static int CountResidues(PDB *pdbStart, PDB *pdbStop);
230 static int FindResidueCode(char *resnam, char *KnownResidueIndex);
231 static void MarkBends(char **ssTable, char *detailSS, char *finalSS,
232  int seqlen);
233 static void SetChainEnds(char *breakSymbol, int *chainSize,
234  int *numChains, int *chainEnd,
235  int numberInChain, char **residueID,
236  int resIndex, int seqlen, BOOL verbose);
237 static REAL CalcDihedral(int angnum, REAL *atoma, REAL *atomb,
238  REAL *atomc, REAL *atomd);
239 static int FindHBondOffset(REAL energy, REAL bonde1, REAL bonde2);
240 static void SetHBond(REAL **hbondEnergy, int **hbond, REAL energy,
241  int donorResIndex, int acceptorResIndex,
242  int *bondCount, BOOL verbose);
243 static void FindNextPartialSheet(int startPoint, int sheetNum,
244  char **ssTable, int *sheetCode,
245  int **bridgePoints, BOOL *found,
246  int seqlen);
247 static void FindFirstUnsetSheet(int startPoint, char **ssTable,
248  int *sheetCode, int *startOfSheet,
249  int *endOfSheet, int seqlen);
250 static void MarkHelicesInSummary(char *summ, char helixChar,
251  char altHelixChar, char turnChar,
252  char altTurnChar, int helixSize,
253  int seqlen);
254 static void MarkHelices(char **ssTable, char *detailSS, char *finalSS,
255  int helixPos, int helixSize, char helixCharacter,
256  char altHelixCharacter, int seqlen);
257 static void MarkSheetsAndBridges(int seqlen, char **ssTable,
258  char *detailSS, char *finalSS,
259  char *ssSymbols, char *altSSSymbols);
260 static void RejectHBond(int resIndex, int *bond, REAL *bonde);
261 static void SetSheet(int bridgePoint, int sheetNum, int *sheetCode,
262  char **ssTable, int seqlen);
263 static void LabelSheets(char **ssTable, int **bridgePoints,
264  int *sheetCode, int seqlen, BOOL verbose);
265 static int FindNextCodeOccurrence(int strandStart, int strandEnd,
266  int code, int **strandCode);
267 static void MarkTurns(char **ssTable, char *detailSS, char *finalSS,
268  char turnChar, char altTurnChar, int seqlen);
269 static void FindNextStrand(int **strandCode, int sheetStart,
270  int sheetEnd, int *strandCount,
271  int *startIndex, int lastStrand,
272  int *bestvalue);
273 static void FindChainBreaks(REAL ***mcCoords, BOOL **gotAtom,
274  char **residueID, char *breakSymbol,
275  int *chainSize, int *numChains, int *chainEnd,
276  BOOL caOnly, int seqlen, BOOL verbose);
277 static void AddHydrogens(REAL ***mcCoords, BOOL **gotAtom, int *chainSize,
278  int numChains, BOOL verbose);
279 static void MakeHBonds(REAL ***mcCoords, BOOL **gotAtom, int **hbond,
280  REAL **hbondEnergy, int *residueTypes,
281  int *chainEnd, int seqlen, BOOL verbose);
282 static void CalcMCAngles(REAL ***mcCoords, REAL **mcAngles,
283  BOOL **gotAtom, int *chainSize, int numChains,
284  BOOL caOnly, int seqlen);
285 static BOOL MakeTurnsAndBridges(int **hbond, char **ssTable,
286  REAL **mcAngles, int **bridgePoints,
287  int *chainEnd, int seqlen,
288  BOOL verbose);
289 static void MakeSummary(char **ssTable, char *detailSS, char *finalSS,
290  int seqlen);
291 static void SetBendResidues(REAL **mcAngles, char **ssTable, int seqlen);
292 
293 
294 /************************************************************************/
295 /*>int blCalcSecStrucPDB(PDB *pdbStart, PDB *pdbStop, BOOL verbose)
296  -----------------------------------------------------------------
297 *//**
298  \param[in] *pdbStart Start of PDB linked list
299  \param[in] *pdbStop End of PDB linked list (NULL or pointer
300  to start of next chain)
301  \param[in] verbose Provide informational messages
302  \return 0 - success
303  -ve - error (See SECSTR_ERR_xxxxx)
304 
305  Calculate secondary structure populating the ss field of the PDB
306  structure.
307 
308 - 19.05.99 Original By: ACRM
309 - 27.05.99 Standard format for messages
310 - 09.06.99 Warning only issued if not quiet
311 - 10.07.15 Modified for BiopLib
312 - 09.03.16 Zero-basing
313 - 10.08.16 Completed zero-basing
314 */
315 int blCalcSecStrucPDB(PDB *pdbStart, PDB *pdbStop, BOOL verbose)
316 {
317  char **ssTable = NULL,
318  *detailSS = NULL,
319  *finalSS = NULL,
320  *breakSymbol = NULL, /* Chain break symbols */
321  **residueID = NULL; /* Residue identifiers (including chain
322  ID and insert code) */
323  int seqlen,
324  numChains,
325  resCount,
326  seqlenMalloc,
327  chainSize[MAX_NUM_CHN], /* number of residues in chain */
328  chainEnd[MAX_NUM_CHN],
329  retval = SECSTR_ERR_NOERR,
330  **hbond = NULL,
331  **bridgePoints = NULL,
332  *residueTypes = NULL; /* amino acid sequence stored as offsets
333  into the KnownResidueIndex[] array */
334 
335  REAL ***mcCoords = NULL, /* Array of mainchain coords */
336  **hbondEnergy = NULL,
337  **mcAngles = NULL;
338 
339  BOOL **gotAtom = NULL, /* Flags for mainchain atoms found */
340  caOnly;
341 
342  static char KnownResidueIndex[] = "ALAASXCYSASPGLUPHEGLYHISILEXXX\
343 LYSLEUMETASNXXXPROGLNARGSERTHRXXXVALTRPXXXTYRGLXUNKPCAINI";
344 
345  seqlenMalloc = CountResidues(pdbStart, pdbStop);
346 
347  /* Allocate memory for arrays */
348  detailSS = (char *)malloc(sizeof(char) * seqlenMalloc);
349  finalSS = (char *)malloc(sizeof(char) * seqlenMalloc);
350  breakSymbol = (char *)malloc(sizeof(char) * seqlenMalloc);
351  residueTypes = (int *)malloc(sizeof(int) * seqlenMalloc);
352 
353  gotAtom = (BOOL **)blArray2D(sizeof(BOOL), NUM_MC_ATOM_TYPES,
354  seqlenMalloc);
355  hbondEnergy = (REAL **)blArray2D(sizeof(REAL), seqlenMalloc,
356  MAX_NUM_HBOND);
357  mcAngles = (REAL **)blArray2D(sizeof(REAL), MAX_NUM_ANGLES,
358  seqlenMalloc);
359  ssTable = (char **)blArray2D(sizeof(char), NUM_STRUC_SYMS,
360  seqlenMalloc);
361  residueID = (char **)blArray2D(sizeof(char), seqlenMalloc,
362  16);
363  hbond = (int **)blArray2D(sizeof(int), seqlenMalloc,
364  MAX_NUM_HBOND);
365  bridgePoints = (int **)blArray2D(sizeof(int), NUM_BRIDGE_PAIR,
366  seqlenMalloc);
367  mcCoords = (REAL ***)blArray3D(sizeof(REAL), NUM_MC_ATOM_TYPES,
368  seqlenMalloc, COORD_DIM);
369 
370  /* Check all allocations succeeded */
371  if((detailSS != NULL) &&
372  (finalSS != NULL) &&
373  (breakSymbol != NULL) &&
374  (residueTypes != NULL) &&
375  (gotAtom != NULL) &&
376  (hbondEnergy != NULL) &&
377  (mcAngles != NULL) &&
378  (ssTable != NULL) &&
379  (residueID != NULL) &&
380  (hbond != NULL) &&
381  (bridgePoints != NULL) &&
382  (mcCoords != NULL))
383  {
384  /* Extract the required data from the PDB linked list */
385  ExtractPDBData(pdbStart, pdbStop, mcCoords, gotAtom, residueID,
386  &caOnly, &seqlen, residueTypes, KnownResidueIndex);
387 
388  if(caOnly) /* Secondary structure undefined - just insert '?' */
389  {
390  for(resCount=0; resCount<seqlen; resCount++)
391  {
392  finalSS[resCount] = '?';
393  }
394 
395  if(verbose)
396  {
397  fprintf(stderr,"Sec Struc: (warning) protein chain %c is \
398 CA-only. Secondary structure undefined.\n", pdbStart->chain[0]);
399  }
400  }
401  else
402  {
403  /* Sets breakSymbol[], chainSize[], numChains, chainEnd */
404  FindChainBreaks(mcCoords, gotAtom, residueID, breakSymbol,
405  chainSize, &numChains, chainEnd, caOnly, seqlen,
406  verbose);
407 
408  AddHydrogens(mcCoords, gotAtom, chainSize, numChains, verbose);
409 
410  /* Sets hbond, hbondEnergy */
411  MakeHBonds(mcCoords, gotAtom, hbond, hbondEnergy, residueTypes,
412  chainEnd, seqlen, verbose);
413 
414  /* Sets mcAngles[] */
415  CalcMCAngles(mcCoords, mcAngles, gotAtom, chainSize, numChains,
416  caOnly, seqlen);
417 
418  /* Sets ssTable[][], bridgePoints[][] */
419  if(!MakeTurnsAndBridges(hbond, ssTable, mcAngles, bridgePoints,
420  chainEnd, seqlen, verbose))
421  {
423  return(SECSTR_ERR_NOMEM);
424  }
425 
426  /* Updates ssTable[][] */
427  SetBendResidues(mcAngles, ssTable, seqlen);
428 
429  /* Sets detailSS[], finalSS[] */
430  MakeSummary(ssTable, detailSS, finalSS, seqlen);
431  }
432 
433  /* Set the results back into the PDB linked list */
434  SetPDBSecStruc(pdbStart, pdbStop, finalSS);
435  }
436  else
437  {
438  retval = SECSTR_ERR_NOMEM;
439  }
440 
442  return(retval);
443 }
444 
445 
446 /************************************************************************/
447 /*>static void SetPDBSecStruc(PDB *pdbStart, PDB *pdbStop, char *finalSS)
448  ----------------------------------------------------------------------
449 *//**
450  \param[in,out] *pdbStart Start of PDB linked list
451  \param[in] *pdbStop End of PDB linked list
452  \param[in] *finalSS Array of secondary structure assignments
453 
454  Copies secondary structure assignment data into the PDB linked list.
455  If no backbone nitrogen is present, then the secondary structure field
456  is set to a '?'.
457 
458 - 19.05.99 Original By: ACRM
459 - 13.07.15 Modified for BiopLib
460 - 09.03.16 Completed zero-basing
461 */
462 static void SetPDBSecStruc(PDB *pdbStart, PDB *pdbStop, char *finalSS)
463 {
464  PDB *p, *r,
465  *nextRes = NULL;
466  int i = 0;
467  char ssChar;
468  BOOL gotN;
469 
470  for(p=pdbStart; p!=pdbStop; p=nextRes)
471  {
472  nextRes = blFindNextResidue(p);
473 
474  /* Check we have a backbone N */
475  gotN = FALSE;
476  for(r=p; r!=nextRes; NEXT(r))
477  {
478  if(!strncmp(r->atnam, "N ", 4))
479  {
480  gotN = TRUE;
481  break;
482  }
483  }
484 
485  /* Extract the secondary structure value or use '?' if the backbone
486  N was not found
487  */
488  ssChar = ((gotN) ? finalSS[i++] : '?');
489 
490  /* Set the values into the PDB linked list */
491  for(r=p; r!=nextRes; NEXT(r))
492  {
493  r->secstr = ssChar;
494  }
495  }
496 }
497 
498 
499 /************************************************************************/
500 /*>static int CountResidues(PDB *pdbStart, PDB *pdbStop)
501  -----------------------------------------------------
502 *//**
503  \param[in] *pdbStart Start of PDB linked list
504  \param[in] *pdbStop End of PDB linked list
505  \return Number of residues
506 
507  Counts how many residue are present in the PDB linked list
508 
509 - 10.07.15 Original By: ACRM
510 - 13.07.15 Modified for BiopLib
511 - 09.03.16 Completed zero-basing
512 */
513 static int CountResidues(PDB *pdbStart, PDB *pdbStop)
514 {
515  int resCount;
516  PDB *p, *nextRes;
517 
518  resCount = 0;
519 
520  for(p=pdbStart; p!=pdbStop; p = nextRes)
521  {
522  resCount++;
523  nextRes = blFindNextResidue(p);
524  }
525 
526  return(resCount);
527 }
528 
529 
530 /************************************************************************/
531 /*>static int FindResidueCode(char *resnam, char *KnownResidueIndex)
532  ------------------------------------------------------------------
533 *//**
534  \param[in] *resnam Residue name
535  \param[in] *KnownResidueIndex Array of known residue names
536  \return Numeric encoding of the residue
537 
538  Steps through the KnownResidueIndex array to find a numeric encoding
539  for a given amino acid type
540 
541 - 19.05.99 Original By: ACRM
542 - 13.07.15 Modified for BiopLib
543 - 09.03.16 Completed zero-basing
544 */
545 static int FindResidueCode(char *resnam, char *KnownResidueIndex)
546 {
547  int i, codLen;
548 
549  codLen = strlen(KnownResidueIndex);
550 
551  for(i=0; i<codLen; i+=3)
552  {
553  if(!strncmp(resnam, KnownResidueIndex+i, 3))
554  return((int)(i/3));
555  }
556 
557  return(-1);
558 }
559 
560 
561 /************************************************************************/
562 /*>static void ExtractPDBData(PDB *pdbStart, PDB *pdbStop,
563  REAL ***mcCoords, BOOL **gotAtom,
564  char **residueID, BOOL *caOnly,
565  int *seqlen, int *residueTypes,
566  char *KnownResidueIndex)
567  -------------------------------------------------------------------
568 *//**
569  \param[in] *pdbStart Start of PDB linked list
570  \param[in] *pdbStop End of PDB linked list
571  \param[out] ***mcCoords Mainchain coordinates
572  \param[out] **gotAtom Flags to indicate whether each backbone
573  atom type is present
574  \param[out] **residueID Residue labels - used for error messages
575  \param[out] *caOnly Is the chain CA only?
576  \param[out] *seqlen How many residues
577  \param[out] *residueTypes amino acid sequence stored as offsets
578  into the KnownResidueIndex[] array
579  \param[in] *KnownResidueIndex Index array of residue types for
580  numeric encoding
581 
582  Extracts the required data from the PDB linked list
583 
584 - 19.05.99 Original By: ACRM
585 - 13.07.15 Modified for BiopLib
586 - 17.02.16 x,y,z for coordinates now count from 0 instead of 1
587 - 09.03.16 Completed zero-basing
588 */
589 static void ExtractPDBData(PDB *pdbStart, PDB *pdbStop,
590  REAL ***mcCoords, BOOL **gotAtom,
591  char **residueID, BOOL *caOnly,
592  int *seqlen, int *residueTypes,
593  char *KnownResidueIndex)
594 {
595  char buffer[MAXBUFF],
596  prevInsert[8];
597  int i,
598  prevResnum = (-99999999),
599  atomIdx,
600  caCount = 0,
601  nCount = 0,
602  resCount = (-1);
603  PDB *p;
604 
605  strncpy(prevInsert, "-", 8);
606 
607  /* Assume not C-alpha only - we check the N and CA counts and reset
608  this later if there were too few Ns for the number of CAs
609  */
610  *caOnly = FALSE;
611 
612  for(p=pdbStart; p!=pdbStop; NEXT(p))
613  {
614  /* If it's the start of a new residue, record the residue level
615  information
616  */
617  if((p->resnum != prevResnum) || !INSERTMATCH(p->insert, prevInsert))
618  {
619  prevResnum = p->resnum;
620  strcpy(prevInsert, p->insert);
621  resCount++;
622 
623  for(i=0; i<NUM_MC_ATOM_TYPES; i++)
624  {
625  gotAtom[i][resCount] = FALSE;
626  }
627 
628  sprintf(buffer,"%s.%d%s", p->chain, p->resnum, p->insert);
629  strncpy(residueID[resCount], buffer, 16);
630  residueTypes[resCount] = FindResidueCode(p->resnam,
631  KnownResidueIndex);
632  }
633 
634  /* See if it's backbone/C-beta */
635  if(!strncmp(p->atnam, "N ",4))
636  {
637  atomIdx = ATOM_N;
638  nCount++;
639  }
640  else if(!strncmp(p->atnam, "CA ",4))
641  {
642  atomIdx = ATOM_CA;
643  caCount++;
644  }
645  else if(!strncmp(p->atnam, "C ",4))
646  {
647  atomIdx = ATOM_C;
648  }
649  else if(!strncmp(p->atnam, "O ",4))
650  {
651  atomIdx = ATOM_O;
652  }
653  else if(!strncmp(p->atnam, "CB ",4))
654  {
655  atomIdx = ATOM_CB;
656  }
657  else if(!strncmp(p->atnam, "H ",4))
658  {
659  atomIdx = ATOM_H;
660  }
661  else
662  {
663  atomIdx = -1;
664  }
665 
666  /* If it was, then we store it */
667  if(atomIdx != (-1))
668  {
669  gotAtom[atomIdx][resCount] = TRUE;
670  mcCoords[atomIdx][resCount][0] = p->x;
671  mcCoords[atomIdx][resCount][1] = p->y;
672  mcCoords[atomIdx][resCount][2] = p->z;
673  }
674  }
675 
676  if(nCount < caCount/2)
677  {
678  *caOnly = TRUE;
679  }
680 
681  *seqlen = resCount+1;
682 }
683 
684 
685 /************************************************************************/
686 /*>static void MarkBends(char **ssTable, char *detailSS, char *finalSS,
687  int seqlen)
688  --------------------------------------------------------------------
689 *//**
690  \param[in] ssTable The sec struc table [struc symbol][res]
691  \param[out] detailSS detailed ss assignment array
692  \param[out] finalSS final ss assignment array
693  \param[in] seqlen sequence length
694 
695  Enter bend symbols into summary information arrays
696 
697 - 19.05.99 Original By: ACRM
698 - 13.07.15 Modified for BiopLib
699 - 09.03.16 Completed zero-basing
700 */
701 static void MarkBends(char **ssTable, char *detailSS, char *finalSS,
702  int seqlen)
703 {
704  int resCount;
705 
706  for(resCount=0; resCount<seqlen; resCount++)
707  {
708  if(ssTable[SECSTR_IDX_BEND][resCount] != SECSTR_COIL)
709  {
710  if(detailSS[resCount] == SECSTR_COIL)
711  detailSS[resCount] = ssTable[SECSTR_IDX_BEND][resCount];
712 
713  if(finalSS[resCount] == SECSTR_COIL)
714  finalSS[resCount] = ssTable[SECSTR_IDX_BEND][resCount];
715  }
716  }
717 }
718 
719 
720 /************************************************************************/
721 /*>static void FindChainBreaks(REAL ***mcCoords, BOOL **gotAtom,
722  char **residueID, char *breakSymbol,
723  int *chainSize, int *numChains,
724  int *chainEnd, BOOL caOnly, int seqlen,
725  BOOL verbose)
726  ----------------------------------------------------------------------
727 *//**
728  \param[in] ***mcCoords Mainchain coordinates
729  \param[in] **gotAtom Flags to indicate whether each backbone
730  atom type is present
731  \param[in] **residueID Residue ID
732  \param[in] caOnly Input flag
733  \param[in] seqlen Input seq length
734  \param[in] verbose Print messages
735  \param[out] *breakSymbol Array indexed by residue number indicating
736  chain breaks
737  \param[out] *chainSize Array indexed by chain number indicating
738  the number of residues in each chain
739  \param[out] *numChains Chain count
740  \param[out] *chainEnd Array indexed by chain number indicating
741  the end of each chain
742 
743  Look for chain breaks. We introduce these wherever the peptide bond
744  is longer than MAX_PEPTIDE_BOND or either the C or N is missing. If
745  the chain is CA only, we introduce a break wherever the CA distance is
746  greater than MAX_CA_DISTANCE
747 
748  We should be passing in individual chains, but this allows us not to
749  (providing the MAX_NUM_CHN isn't exceeded) and also accounts for
750  pseudo-chain breaks as a result of missing residues.
751 
752  The code initialies the breakSymbol[] array so that all residues are
753  set to coil. Where it identifies chain breaks, it calls SetChainEnds()
754  which updates numChain and chainEnd, and sets the breakSymbol[] either
755  side of the break to SECSTR_BREAK ('!')
756 
757 - 19.05.99 Original By: ACRM
758 - 27.05.99 Standard format for messages
759 - 13.07.15 Modified for BiopLib
760 - 09.03.16 Completed zero-basing
761 */
762 static void FindChainBreaks(REAL ***mcCoords, BOOL **gotAtom,
763  char **residueID, char *breakSymbol,
764  int *chainSize, int *numChains, int *chainEnd,
765  BOOL caOnly, int seqlen, BOOL verbose)
766 {
767  int resIndex,
768  numberInChain;
769 
770  for(resIndex=0; resIndex<seqlen; resIndex++)
771  {
772  breakSymbol[resIndex] = SECSTR_COIL;
773  }
774  numberInChain = 1;
775  *numChains = 1;
776 
777  for(resIndex=1; resIndex<seqlen; resIndex++)
778  {
779  if(caOnly)
780  {
781  if(ATDIST(mcCoords[ATOM_CA][resIndex-1],
782  mcCoords[ATOM_CA][resIndex]) > MAX_CA_DISTANCE)
783  {
784  SetChainEnds(breakSymbol, chainSize, numChains, chainEnd,
785  numberInChain, residueID, resIndex, seqlen,
786  verbose);
787  numberInChain = 1;
788  }
789  else
790  {
791  numberInChain++;
792  }
793  }
794  else
795  {
796  if(gotAtom[ATOM_C][resIndex-1] && gotAtom[ATOM_N][resIndex])
797  {
798  if(ATDIST(mcCoords[ATOM_C][resIndex-1],
799  mcCoords[ATOM_N][resIndex]) > MAX_PEPTIDE_BOND)
800  {
801  SetChainEnds(breakSymbol, chainSize, numChains, chainEnd,
802  numberInChain, residueID, resIndex, seqlen,
803  verbose);
804  numberInChain = 1;
805  }
806  else
807  {
808  numberInChain++;
809  }
810  }
811  else
812  {
813  SetChainEnds(breakSymbol, chainSize, numChains, chainEnd,
814  numberInChain, residueID, resIndex, seqlen,
815  verbose);
816  numberInChain = 1;
817  }
818  }
819  }
820  SetChainEnds(breakSymbol, chainSize, numChains, chainEnd,
821  numberInChain, residueID, seqlen, seqlen, verbose);
822 }
823 
824 
825 /************************************************************************/
826 /*>static void SetChainEnds(char *breakSymbol, int *chainSize,
827  int *numChains, int *chainEnd,
828  int numberInChain, char **residueID,
829  int resIndex, int seqlen, BOOL verbose)
830  -------------------------------------------------------------
831 *//**
832  \param[out] *breakSymbol Array indexed by residue number
833  indicating chain breaks
834  \param[in] *chainSize Array indexed by chain number indicating
835  the number of residues in each chain
836  \param[in,out] *numChains Chain count
837  \param[out] *chainEnd Array indexed by chain number indicating
838  the end of each chain
839  \param[in] numberInChain Number of residues in the current chain
840  \param[in] **residueID Residue labels - used for error messages
841  \param[in] resIndex Current position in the list of residues
842  \param[in] seqlen Total number of residues
843  \param[in] verbose Give error messages
844 
845  Stores the information on where there are chain ends (or breaks in
846  the chain owing to missing residues)
847 
848  Updates numChain and chainEnd, and sets the breakSymbol[] either
849  side of the break to SECSTR_BREAK ('!')
850 
851 - 19.05.99 Original By: ACRM
852 - 27.05.99 Standard format for messages
853 - 13.07.15 Modified for BiopLib
854 */
855 static void SetChainEnds(char *breakSymbol, int *chainSize,
856  int *numChains, int *chainEnd,
857  int numberInChain, char **residueID,
858  int resIndex, int seqlen, BOOL verbose)
859 {
860  chainSize[*numChains-1] = numberInChain;
861 
862  if(*numChains == 1)
863  {
864  chainEnd[*numChains] = numberInChain;
865  }
866  else
867  {
868  chainEnd[*numChains] = chainEnd[*numChains-1] + numberInChain;
869  }
870 
871  if(resIndex < seqlen)
872  {
873  breakSymbol[resIndex-1] = SECSTR_BREAK;
874  breakSymbol[resIndex] = SECSTR_BREAK;
875 
876  if(verbose)
877  {
878  fprintf(stderr,"Sec Struc: (info) Chain break between %d(%s) \
879 and %d(%s)\n",
880  resIndex-1, residueID[resIndex-1], resIndex,
881  residueID[resIndex]);
882  }
883 
884  (*numChains)++;
885  if(*numChains >= MAX_NUM_CHN)
886  {
887  *numChains = MAX_NUM_CHN-1;
888  if(verbose)
889  {
890  fprintf(stderr,"Sec Struc: (warning) Maximum number of \
891 chain-breaks exceeded (MAX_NUM_CHN = %d)\n", MAX_NUM_CHN);
892  }
893  }
894  }
895 }
896 
897 /************************************************************************/
898 /*>static void AddHydrogens(REAL ***mcCoords, BOOL **gotAtom,
899  int *chainSize, int numChains, BOOL verbose)
900  ---------------------------------------------------------------------
901 *//**
902  \param[in,out] ***mcCoords Mainchain coordinates
903  \param[in] **gotAtom Flags to indicate whether each backbone
904  atom type is present
905  \param[in] *chainSize Array indexed by chain number indicating
906  the number of residues in each chain
907  \param[in] numChains Chain count
908  \param[in] verbose Give error messages
909 
910  Create backbone hydrogens. These are placed 1A from the N such that
911  N-H is parallel to the preceding C=0
912 
913 - 19.05.99 Original By: ACRM
914 - 27.05.99 Warning in standard format and checks quiet flag
915 - 13.07.15 Modified for BiopLib
916 - 17.02.16 x,y,z for coordinates now count from 0 instead of 1
917 */
918 static void AddHydrogens(REAL ***mcCoords, BOOL **gotAtom, int *chainSize,
919  int numChains, BOOL verbose)
920 {
921  int resCount,
922  i,
923  chainStart,
924  chainNumber;
925  REAL atvec[COORD_DIM], colen;
926 
927  chainStart = 0;
928  /* For each chain */
929  for(chainNumber=0; chainNumber<numChains; chainNumber++)
930  {
931  for(resCount=chainStart+1;
932  resCount<chainStart+chainSize[chainNumber];
933  resCount++)
934  { /* Check we have the N, C and O */
935  if(gotAtom[ATOM_N][resCount] &&
936  gotAtom[ATOM_C][resCount-1] &&
937  gotAtom[ATOM_O][resCount-1])
938  {
939  colen = 0.0;
940  for(i=0; i<COORD_DIM; i++)
941  {
942  atvec[i] = mcCoords[ATOM_C][resCount-1][i] -
943  mcCoords[ATOM_O][resCount-1][i];
944  colen += atvec[i] * atvec[i];
945  }
946  colen = sqrt(colen);
947 
948  for(i=0; i<COORD_DIM; i++)
949  {
950  atvec[i] /= colen;
951  }
952 
953  for(i=0; i<COORD_DIM; i++)
954  {
955  mcCoords[ATOM_H][resCount][i] =
956  mcCoords[ATOM_N][resCount][i] + atvec[i];
957  }
958 
959  gotAtom[ATOM_H][resCount] = TRUE;
960  }
961  else
962  {
963  if(verbose)
964  {
965  fprintf(stderr,"Sec Struc: (warning) Hydrogen atom not \
966 generated for residue %d\n",
967  resCount+1);
968  }
969  gotAtom[ATOM_H][resCount] = FALSE;
970  }
971  }
972  chainStart += chainSize[chainNumber];
973  }
974 }
975 
976 
977 
978 /************************************************************************/
979 /*>static REAL CalcDihedral(int angnum, REAL *atoma, REAL *atomb,
980  REAL *atomc, REAL *atomd)
981  ---------------------------------------------------------------
982 *//**
983  \param[in] angnum Which angle is being calculating
984  \param[in] *atoma First atom
985  \param[in] *atomb Second atom
986  \param[in] *atomc Third atom
987  \param[in] *atomd Fourth atom
988  \return Dihedral
989 
990  Calculate the dihedral described by 4 atoms. angnum specifies which
991  dihedral is to be calculated (see the DIHED_XXXX defines) as the
992  'special' ones (KAPPA (CA-CA-CA-CA) and TCO (C=O, C=O)) need special
993  treatment.
994 
995 - 19.05.99 Original By: ACRM
996 - 13.07.15 Modified for BiopLib
997 - 17.02.16 x,y,z for coordinates now count from 0 instead of 1
998  dotproduct[][] and dihatm[] now count from 0
999 */
1000 static REAL CalcDihedral(int angnum,
1001  REAL *atoma,
1002  REAL *atomb,
1003  REAL *atomc,
1004  REAL *atomd)
1005 {
1006  REAL *dihatm[NUM_DIHED_DATA],
1007  codist[COORD_DIM][NUM_DIHED_DATA],
1008  atomDistance[NUM_DIHED_DATA],
1009  dotProduct[NUM_DIHED_DATA][NUM_DIHED_DATA],
1010  ang,
1011  cosAngle;
1012  int i, j, k;
1013 
1014  /* It's a standard dihedral, call our normal bioplib routine */
1015  if((angnum <= NUM_STANDARD_DIHEDRALS) || (angnum >= MAX_NUM_ANGLES))
1016  {
1017 /*HERE*/
1018 /*
1019 fprintf(stderr,"%.2f %.2f %.2f\n",
1020  atoma[0], atoma[1], atoma[2]);
1021 fprintf(stderr,"%.2f %.2f %.2f\n",
1022  atomb[0], atomb[1], atomb[2]);
1023 fprintf(stderr,"%.2f %.2f %.2f\n",
1024  atomc[0], atomc[1], atomc[2]);
1025 fprintf(stderr,"%.2f %.2f %.2f\n",
1026  atomd[0], atomd[1], atomd[2]);
1027 */
1028  return(RADIAN * blPhi(atoma[0], atoma[1], atoma[2],
1029  atomb[0], atomb[1], atomb[2],
1030  atomc[0], atomc[1], atomc[2],
1031  atomd[0], atomd[1], atomd[2]));
1032  }
1033 
1034  /* A non-standard dihedral, use the special code */
1035  dihatm[0] = atoma;
1036  dihatm[1] = atomb;
1037  dihatm[2] = atomc;
1038  dihatm[3] = atomd;
1039 
1040  for(i=0; i<NUM_DIHED_DATA; i++)
1041  {
1042  atomDistance[i] = ATDIST(dihatm[i],dihatm[i+1]);
1043  for(j=0; j<COORD_DIM; j++)
1044  {
1045  codist[j][i] = dihatm[i+1][j] - dihatm[i][j];
1046  }
1047  }
1048 
1049  for(i=0; i<NUM_DIHED_DATA; i++)
1050  {
1051  for(j=0; j<NUM_DIHED_DATA; j++)
1052  {
1053  dotProduct[i][j] = 0.0;
1054  }
1055  }
1056 
1057  for(i=0; i<NUM_DIHED_DATA-1; i++)
1058  {
1059  for(j=i+1; j<NUM_DIHED_DATA; j++)
1060  {
1061  for(k=0; k<COORD_DIM; k++)
1062  {
1063  dotProduct[i][j] += (codist[k][i] * codist[k][j]);
1064  }
1065  }
1066  }
1067 
1068  for(i=0; i<NUM_DIHED_DATA-1; i++)
1069  {
1070  for(j=i+1; j<NUM_DIHED_DATA; j++)
1071  {
1072  if(APPROXEQ(atomDistance[i], 0.0) ||
1073  APPROXEQ(atomDistance[j], 0.0))
1074  {
1075  dotProduct[i][j] = 1.0;
1076  }
1077  else
1078  {
1079  dotProduct[i][j] = (dotProduct[i][j] / atomDistance[i]) /
1080  atomDistance[j];
1081  }
1082  }
1083  }
1084 
1085  cosAngle = dotProduct[0][2];
1086 
1087  if(fabs(cosAngle) > 1.0)
1088  {
1089  REAL tmp = cosAngle/fabs(cosAngle);
1090 
1091  cosAngle = ANINT(tmp);
1092  }
1093 
1094  ang = acos(cosAngle) * RADIAN;
1095 
1096  return(ang);
1097 }
1098 
1099 
1100 /************************************************************************/
1101 /*>static int FindHBondOffset(REAL energy, REAL bonde1, REAL bonde2)
1102  -----------------------------------------------------------------
1103 *//**
1104  \param[in] energy Energy of the new HBond
1105  \param[in] bonde1 Energy of 2nd best HBond
1106  \param[in] bonde2 Energy of best HBond
1107  \return Best HBond
1108 
1109  Finds which of three HBond energies is the best. Note that positive
1110  scores are good.
1111 
1112 - 19.05.99 Original By: ACRM
1113 - 13.07.15 Modified for BiopLib
1114 */
1115 static int FindHBondOffset(REAL energy, REAL bonde1, REAL bonde2)
1116 {
1117  if(energy < bonde1)
1118  {
1119  return(1);
1120  }
1121  else if(energy < bonde2)
1122  {
1123  return(2);
1124  }
1125 
1126  return(0);
1127 }
1128 
1129 
1130 /************************************************************************/
1131 /*>static void SetHBond(REAL **hbondEnergy, int **hbond, REAL energy,
1132  int donorResIndex, int acceptorResIndex,
1133  int *bondCount, BOOL verbose)
1134  ------------------------------------------------------------------
1135 *//**
1136  \param[out] **hbondEnergy HBond Energy [resnum][hbondIndex]
1137  Array of energies for the found
1138  HBonds
1139  \param[out] **hbond HBond [resnum][hbondIndex]
1140  Array of the HBonds we've found
1141  \param[in] energy Energy of a new HBond that we want
1142  to store
1143  \param[in] donorResIndex Index of the new donor
1144  \param[in] acceptorResIndex Index of the new acceptor
1145  \param[in,out] *bondCount Number of HBonds
1146  \param[in] verbose Print messages
1147 
1148  We only keep the best 2 HBonds, so find whether this one is better than
1149  any previous ones and slot it in. Throw away the third HBond if we
1150  have too many. Note that positive energies are good.
1151 
1152  Note that we allow 2 hbonds from C=O and 2 from N-H
1153 
1154 - 19.05.99 Original By: ACRM
1155 - 27.05.99 Standard format for messages
1156 - 13.07.15 Modified for BiopLib
1157 */
1158 static void SetHBond(REAL **hbondEnergy,
1159  int **hbond,
1160  REAL energy,
1161  int donorResIndex,
1162  int acceptorResIndex,
1163  int *bondCount,
1164  BOOL verbose)
1165 {
1166  int acceptorOffset,
1167  donorOffset,
1168  rejectOffset;
1169 
1170  donorOffset = FindHBondOffset(energy,
1171  hbondEnergy[donorResIndex][NST],
1172  hbondEnergy[donorResIndex][NST+1]);
1173  acceptorOffset = FindHBondOffset(energy,
1174  hbondEnergy[acceptorResIndex][CST],
1175  hbondEnergy[acceptorResIndex][CST+1]);
1176 
1177  if(acceptorOffset == 0 || donorOffset == 0)
1178  {
1179  if(verbose)
1180  {
1181  fprintf(stderr,"Sec Struc: (info) Third H-bond skipped. \
1182 Donor index: %4d; Acceptor Index: %4d; Energy: %6.2f\n",
1183  donorResIndex+1, acceptorResIndex+1, energy);
1184  }
1185  }
1186  else
1187  {
1188  if(hbond[donorResIndex][NST+1] != 0)
1189  {
1190  if(verbose)
1191  {
1192  fprintf(stderr,"Sec Struc: (info) Third H-bond skipped. \
1193 Donor index: %4d; Acceptor index: %4d; Energy %6.2f\n",
1194  donorResIndex+1,
1195  hbond[donorResIndex][NST+1],
1196  hbondEnergy[donorResIndex][NST+1]);
1197  }
1198  rejectOffset = hbond[donorResIndex][NST+1];
1199  RejectHBond(donorResIndex+1,
1200  &(hbond[rejectOffset-1][CST]),
1201  &(hbondEnergy[rejectOffset-1][CST]));
1202  (*bondCount)--;
1203  }
1204 
1205  if(hbond[acceptorResIndex][CST+1] != 0)
1206  {
1207  if(verbose)
1208  {
1209  fprintf(stderr,"Sec Struc: (info) Third H-bond skipped. \
1210 Donor index: %4d; Acceptor index: %4d; Energy %6.2f\n",
1211  hbond[acceptorResIndex][CST+1],
1212  acceptorResIndex+1,
1213  hbondEnergy[acceptorResIndex][CST+1]);
1214  }
1215  rejectOffset = hbond[acceptorResIndex][CST+1];
1216  RejectHBond(acceptorResIndex+1,
1217  &(hbond[rejectOffset-1][NST]),
1218  &(hbondEnergy[rejectOffset-1][NST]));
1219  (*bondCount)--;
1220  }
1221 
1222  if(acceptorOffset == 1)
1223  {
1224  hbond[acceptorResIndex][CST+1] = hbond[acceptorResIndex][CST];
1225 
1226  hbondEnergy[acceptorResIndex][CST+1] =
1227  hbondEnergy[acceptorResIndex][CST];
1228  }
1229 
1230  if(donorOffset == 1)
1231  {
1232  hbond[donorResIndex][NST+1] = hbond[donorResIndex][NST];
1233 
1234  hbondEnergy[donorResIndex][NST+1] =
1235  hbondEnergy[donorResIndex][NST];
1236  }
1237 
1238  hbond[acceptorResIndex][CST+acceptorOffset-1] =
1239  donorResIndex+1;
1240  hbondEnergy[acceptorResIndex][CST+acceptorOffset-1] =
1241  energy;
1242  hbond[donorResIndex][NST+donorOffset-1] =
1243  acceptorResIndex+1;
1244  hbondEnergy[donorResIndex][NST+donorOffset-1] =
1245  energy;
1246 
1247  (*bondCount)++;
1248  }
1249 }
1250 
1251 
1252 /************************************************************************/
1253 /*>static void FindNextPartialSheet(int startPoint, int sheetNum,
1254  char **ssTable, int *sheetCode,
1255  int **bridgePoints, BOOL *found,
1256  int seqlen)
1257  -------------------------------------------------------------------
1258 *//**
1259  \param[in] startPoint Start of a sheet (index into residues)
1260  \param[in] sheetNum Sheet number
1261  \param[in] **ssTable The sec struc table [struc symbol][res]
1262  \param[out] *sheetCode The sheet number for each residue
1263  \param[in] **bridgePoints Bridges between strands
1264  \param[out] *found Have we found this sheet
1265  \param[in] seqlen Length of the sequence
1266 
1267  Find the next partially created sheet. Set any negative sheet numbers
1268  to fully set and then set the partners to partially set if they are
1269  not already fully set.
1270 
1271 - 19.05.99 Original By: ACRM
1272 - 13.07.15 Modified for BiopLib
1273 */
1274 static void FindNextPartialSheet(int startPoint,
1275  int sheetNum,
1276  char **ssTable,
1277  int *sheetCode,
1278  int **bridgePoints,
1279  BOOL *found,
1280  int seqlen)
1281 {
1282  int resCount,
1283  startOfSheet,
1284  endOfSheet,
1285  i;
1286 
1287  *found = FALSE;
1288  resCount = startPoint - 1;
1289 
1290  while (sheetCode[resCount] != (-sheetNum))
1291  {
1292  resCount++;
1293  if(resCount >= seqlen)
1294  return;
1295  }
1296 
1297  startOfSheet = resCount;
1298  *found = TRUE;
1299 
1300  while(sheetCode[resCount] == (-sheetNum))
1301  {
1302  resCount++;
1303  if(resCount >= seqlen)
1304  break;
1305  }
1306 
1307  endOfSheet = resCount-1;
1308 
1309  for(resCount = startOfSheet; resCount <= endOfSheet; resCount++)
1310  {
1311  sheetCode[resCount] = sheetNum;
1312  for(i=0; i<NUM_BRIDGE_PAIR; i++)
1313  {
1314  if(bridgePoints[i][resCount] != 0)
1315  {
1316  if(sheetCode[bridgePoints[i][resCount]-1] == 0)
1317  {
1318  SetSheet(bridgePoints[i][resCount], sheetNum, sheetCode,
1319  ssTable, seqlen);
1320  }
1321  }
1322  }
1323  }
1324 }
1325 
1326 
1327 /************************************************************************/
1328 /*>static void FindFirstUnsetSheet(int startPoint, char **ssTable,
1329  int *sheetCode, int *startOfSheet,
1330  int *endOfSheet, int seqlen)
1331  ---------------------------------------------------------------
1332 *//**
1333  \param[in] startPoint Start of sheet
1334  \param[in] **ssTable The sec struc table [struc symbol][res]
1335  \param[in] *sheetCode The sheet number for each residue
1336  \param[out] *startOfSheet Start of a strand
1337  \param[out] *endOfSheet End of a strand
1338  \param[in] seqlen Length of the sequence
1339 
1340  Work through the residues from startPoint to find the first unset
1341  sheet
1342 
1343 - 19.05.99 Original By: ACRM
1344 - 13.07.15 Modified for BiopLib
1345 */
1346 static void FindFirstUnsetSheet(int startPoint, char **ssTable,
1347  int *sheetCode, int *startOfSheet,
1348  int *endOfSheet, int seqlen)
1349 {
1350  int resCount;
1351 
1352  *startOfSheet = 0;
1353  resCount = startPoint-1;
1354 
1355  while((ssTable[SECSTR_IDX_SHEET][resCount] == SECSTR_COIL) ||
1356  (sheetCode[resCount] != 0))
1357  {
1358  resCount++;
1359  if(resCount >= seqlen)
1360  return;
1361  }
1362  *startOfSheet = resCount+1;
1363 
1364  do
1365  {
1366  resCount++;
1367  } while((resCount <= seqlen-1) &&
1368  (ssTable[SECSTR_IDX_SHEET][resCount] != SECSTR_COIL));
1369 
1370  *endOfSheet = resCount;
1371 }
1372 
1373 
1374 
1375 /************************************************************************/
1376 /*>static void MarkHelicesInSummary(char *summ, char helixChar,
1377  char altHelixChar, char turnChar,
1378  char altTurnChar, int helixSize,
1379  int seqlen)
1380  -----------------------------------------------------------------------
1381 *//**
1382  \param[in,out] *summ Secondary structure string
1383  \param[in] helixChar Helix character (G or I)
1384  \param[in] altHelixChar Helix character (g or i)
1385  \param[in] turnChar Turn character (T)
1386  \param[in] altTurnChar Turn character (t)
1387  \param[in] helixSize Length of helix
1388  \param[in] seqlen Length of the sequence
1389 
1390  Mark 3_{10} and Pi helix residues in the summary list
1391 
1392 - 19.05.99 Original By: ACRM
1393 - 13.07.15 Modified for BiopLib
1394 */
1395 static void MarkHelicesInSummary(char *summ, char helixChar,
1396  char altHelixChar, char turnChar,
1397  char altTurnChar, int helixSize,
1398  int seqlen)
1399 {
1400  int resCount,
1401  posInHelix,
1402  startPos = 0;
1403 
1404  for(resCount=1; resCount<seqlen; resCount++)
1405  {
1406  if((summ[resCount] == helixChar) ||
1407  (summ[resCount] == altHelixChar))
1408  {
1409  if(startPos == 0)
1410  startPos = resCount;
1411  }
1412  else
1413  {
1414  if(startPos != 0)
1415  {
1416  if((resCount - startPos) < helixSize)
1417  {
1418  for(posInHelix = startPos;
1419  posInHelix <= (resCount - 1);
1420  posInHelix++)
1421  {
1422  if(summ[posInHelix] == helixChar)
1423  {
1424  summ[posInHelix] = turnChar;
1425  }
1426  else
1427  {
1428  summ[posInHelix] = altTurnChar;
1429  }
1430  }
1431  }
1432  startPos = 0;
1433  }
1434  }
1435  }
1436 
1437  if(startPos != 0)
1438  {
1439  if((seqlen - startPos + 1) < helixSize)
1440  {
1441  for(posInHelix=startPos; posInHelix <= seqlen; posInHelix++)
1442  {
1443  if(summ[resCount] == helixChar)
1444  {
1445  summ[posInHelix] = turnChar;
1446  }
1447  else
1448  {
1449  summ[posInHelix] = altTurnChar;
1450  }
1451  }
1452  }
1453  }
1454 }
1455 
1456 
1457 /************************************************************************/
1458 /*>static void MarkHelices(char **ssTable, char *detailSS, char *finalSS,
1459  int helixPos, int helixSize,
1460  char helixCharacter, char altHelixCharacter,
1461  int seqlen)
1462  ---------------------------------------------------------------------
1463 *//**
1464  \param[in] **ssTable The sec struc table [struc symbol][res]
1465  \param[out] *detailSS detailed ss assignment array
1466  \param[out] *finalSS final ss assignment array
1467  \param[in] helixPos start of a helix
1468  \param[in] helixSize length of a helix
1469  \param[in] helixCharacter helix character (H)
1470  \param[in] altHelixCharacter alt helix character (h)
1471  \param[in] seqlen Sequence length
1472 
1473  Examine the secondary structure bonding table to set any alpha
1474  helices in the summary secondary structure arrays
1475 
1476 - 19.05.99 Original By: ACRM
1477 - 13.07.15 Modified for BiopLib
1478 */
1479 static void MarkHelices(char **ssTable, char *detailSS, char *finalSS,
1480  int helixPos, int helixSize, char helixCharacter,
1481  char altHelixCharacter, int seqlen)
1482 {
1483  int resCount,
1484  posInHelix;
1485 
1486  for(resCount=1; resCount < (seqlen - helixSize); resCount++)
1487  {
1488  if(ssTable[helixPos][resCount] == SECSTR_BEND_START ||
1489  ssTable[helixPos][resCount] == SECSTR_BEND_BOTH)
1490  {
1491  if(ssTable[helixPos][resCount-1] == SECSTR_BEND_START ||
1492  ssTable[helixPos][resCount-1] == SECSTR_BEND_BOTH)
1493  {
1494  for(posInHelix = 0; posInHelix < helixSize; posInHelix++)
1495  {
1496  if(detailSS[resCount+posInHelix] == SECSTR_COIL)
1497  {
1498  detailSS[resCount+posInHelix] = helixCharacter;
1499  }
1500 
1501  if(finalSS[resCount+posInHelix] == SECSTR_COIL ||
1502  finalSS[resCount+posInHelix] == altHelixCharacter)
1503  {
1504  finalSS[resCount+posInHelix] = helixCharacter;
1505  }
1506 
1507  }
1508 
1509  if(finalSS[resCount-1] == SECSTR_COIL)
1510  {
1511  finalSS[resCount-1] = altHelixCharacter;
1512  }
1513 
1514  if(finalSS[resCount+helixSize] == SECSTR_COIL)
1515  {
1516  finalSS[resCount+helixSize] = altHelixCharacter;
1517  }
1518  }
1519  }
1520  }
1521 }
1522 
1523 
1524 /************************************************************************/
1525 /*>static void CalcMCAngles(REAL ***mcCoords, REAL **mcAngles,
1526  BOOL **gotAtom, int *chainSize, int numChains,
1527  BOOL caOnly, int seqlen)
1528  -----------------------------------------------------------------------
1529 *//**
1530  \param[in] ***mcCoords Mainchain coordinates
1531  \param[out] **mcAngles Mainchain torsion angles
1532  \param[in] **gotAtom Flags to indicate whether each backbone
1533  atom type is present
1534  \param[in] *chainSize Array of chain (segment) sizes
1535  \param[in] numChains Number of chains (segments)
1536  \param[in] caOnly Sequence is C-alpha only
1537  \param[in] seqlen Sequence length
1538 
1539  Calculate main chain dihedral angles if all the atoms are present
1540 
1541 - 19.05.99 Original By: ACRM
1542 - 13.07.15 Modified for BiopLib
1543 */
1544 static void CalcMCAngles(REAL ***mcCoords, REAL **mcAngles,
1545  BOOL **gotAtom, int *chainSize, int numChains,
1546  BOOL caOnly, int seqlen)
1547 {
1548  static int angleIndices[MAX_NUM_ANGLES][3] =
1549  {
1550  {2, 0, DIHED_PHI},
1551  {1, -1, DIHED_PSI},
1552  {1, -1, DIHED_OMEGA},
1553  {2, -2, DIHED_CHIRAL},
1554  {1, 0, DIHED_IMPLD},
1555  {3, -2, DIHED_KAPPA},
1556  {2, 0, DIHED_TCO}
1557  };
1558  static int angleAtoms[MAX_NUM_ANGLES][4] =
1559  {
1560  {ATOM_C, ATOM_N, ATOM_CA, ATOM_C},
1561  {ATOM_N, ATOM_CA, ATOM_C, ATOM_N},
1562  {ATOM_CA, ATOM_C, ATOM_N, ATOM_CA},
1563  {ATOM_CA, ATOM_CA, ATOM_CA, ATOM_CA},
1564  {ATOM_CA, ATOM_N, ATOM_C, ATOM_CB},
1565  {ATOM_CA, ATOM_CA, ATOM_CA, ATOM_CA},
1566  {ATOM_C, ATOM_O, ATOM_C, ATOM_O}
1567  };
1568  static int angleOffsets[MAX_NUM_ANGLES][4] =
1569  {
1570  {-2, -1, -1, -1},
1571  {-1, -1, -1, 0},
1572  {-1, -1, 0, 0},
1573  {-2, -1, 0, 1},
1574  {-1, -1, -1, -1},
1575  {-3, -1, -1, 1},
1576  {-1, -1, -2, -2}
1577  };
1578  static int numAtomsRequired[MAX_NUM_ANGLES] =
1579  {2, 2, 2, 4, 1, 5, 2};
1580 
1581  int resCount,
1582  angleIndex,
1583  chainStart,
1584  chainCount,
1585  firstAngle,
1586  finalAngle;
1587 
1588  firstAngle = 0;
1589  finalAngle = MAX_NUM_ANGLES-1;
1590 
1591  if(caOnly)
1592  {
1593  firstAngle = DIHED_CHIRAL;
1594  finalAngle = DIHED_KAPPA;
1595  }
1596 
1597  for(angleIndex=firstAngle; angleIndex<=finalAngle; angleIndex++)
1598  {
1599  chainStart = 0;
1600 
1601  for(chainCount=0; chainCount<numChains; chainCount++)
1602  {
1603  /* If too few residues in chain, create extra NULL coordinates */
1604  if(chainSize[chainCount] < numAtomsRequired[angleIndex])
1605  {
1606  for(resCount = chainStart;
1607  resCount < (chainStart + chainSize[chainCount]);
1608  resCount++)
1609  {
1610  mcAngles[angleIndices[angleIndex][2]][resCount] = NULLVAL;
1611  }
1612  }
1613  else
1614  {
1615  for(resCount = (chainStart + angleIndices[angleIndex][0]);
1616  resCount <= (chainStart + chainSize[chainCount] +
1617  angleIndices[angleIndex][1]);
1618  resCount++)
1619  {
1620  if(gotAtom[angleAtoms[angleIndex][0]]
1621  [resCount+angleOffsets[angleIndex][0]] &&
1622  gotAtom[angleAtoms[angleIndex][1]]
1623  [resCount+angleOffsets[angleIndex][1]] &&
1624  gotAtom[angleAtoms[angleIndex][2]]
1625  [resCount+angleOffsets[angleIndex][2]] &&
1626  gotAtom[angleAtoms[angleIndex][3]]
1627  [resCount+angleOffsets[angleIndex][3]])
1628  {
1629 
1630 
1631 
1632  mcAngles[angleIndices[angleIndex][2]][resCount-1] =
1633  CalcDihedral(angleIndex,
1634  mcCoords[angleAtoms[angleIndex][0]]
1635  [resCount+
1636  angleOffsets[angleIndex][0]],
1637  mcCoords[angleAtoms[angleIndex][1]]
1638  [resCount+
1639  angleOffsets[angleIndex][1]],
1640  mcCoords[angleAtoms[angleIndex][2]]
1641  [resCount+
1642  angleOffsets[angleIndex][2]],
1643  mcCoords[angleAtoms[angleIndex][3]]
1644  [resCount+
1645  angleOffsets[angleIndex][3]])
1646  ;
1647  }
1648  else
1649  {
1650  mcAngles[angleIndices[angleIndex][2]][resCount-1] =
1651  NULLVAL;
1652  }
1653  }
1654 
1655  for(resCount = chainStart;
1656  resCount < chainStart + angleIndices[angleIndex][0] - 1;
1657  resCount++)
1658  {
1659  mcAngles[angleIndices[angleIndex][2]][resCount] = NULLVAL;
1660  }
1661 
1662  for(resCount = chainStart +
1663  chainSize[chainCount] +
1664  angleIndices[angleIndex][1];
1665  resCount < (chainStart + chainSize[chainCount]);
1666  resCount++)
1667  {
1668  mcAngles[angleIndices[angleIndex][2]][resCount] =
1669  NULLVAL;
1670  }
1671  }
1672  chainStart += chainSize[chainCount];
1673  }
1674  }
1675 }
1676 
1677 
1678 /************************************************************************/
1679 /*>static void SetBendResidues(REAL **mcAngles, char **ssTable,
1680  int seqlen)
1681  ------------------------------------------------------------
1682 *//**
1683  \param[in] **mcAngles Mainchain torsion angles
1684  \param[out] **ssTable The sec struc table [struc symbol][res]
1685  \param[in] seqlen The sequence length
1686 
1687  Find any residues where the mainchain is bent by more than BEND_SIZE
1688 
1689 - 19.05.99 Original By: ACRM
1690 - 13.07.15 Modified for BiopLib
1691 */
1692 static void SetBendResidues(REAL **mcAngles, char **ssTable, int seqlen)
1693 {
1694  int resCount;
1695 
1696  for(resCount=0; resCount<seqlen; resCount++)
1697  {
1698  if((mcAngles[DIHED_KAPPA][resCount] > BEND_SIZE) &&
1699  APPROXNE(mcAngles[DIHED_KAPPA][resCount], NULLVAL))
1700  {
1701  ssTable[SECSTR_IDX_BEND][resCount] = SECSTR_BEND;
1702  }
1703  }
1704 }
1705 
1706 
1707 /************************************************************************/
1708 /*>static void MakeHBonds(REAL ***mcCoords, BOOL **gotAtom, int **hbond,
1709  REAL **hbondEnergy, int *residueTypes,
1710  int *chainEnd, int seqlen, BOOL verbose)
1711  ---------------------------------------------------------------------
1712 *//**
1713  \param[in] ***mcCoords Mainchain coordinates
1714  \param[in] **gotAtom Flags to indicate whether each backbone
1715  atom type is present
1716  \param[out] **hbond HBond [resnum][hbondIndex]
1717  Array of the HBonds we've found
1718  \param[out] **hbondEnergy HBond Energy [resnum][hbondIndex]
1719  Array of energies for the found
1720  HBonds
1721  \param[in] *residueTypes amino acid sequence stored as offsets
1722  into the KnownResidueIndex[] array
1723  \param[in] *chainEnd Array indexed by chain number indicating
1724  the end of each chain
1725  \param[in] seqlen Sequence length
1726  \param[in] verbose Print messages
1727 
1728  Identify mainchain hydrogen bonds. We allow each residue to make 2
1729  HBonds from C=O and 2 from N-H. We use the Kabsch and Sander energy
1730  formula to define the presence of an HBond which corresponds to a
1731  maximum distance of 5.2A for linearly arranged atoms with a maximum
1732  angle of 63 degrees:
1733 
1734  / \
1735  | 1 1 1 1 |
1736  E = q * q * f * | --- + --- - --- - --- |
1737  1 2 | d d d d |
1738  \ ON CH OH CN /
1739 
1740  The calculations are optimised by making a distance check and residues
1741  must have at least 1 intervening residue. We also skip prolines as
1742  donors!
1743 
1744 - 19.05.99 Original By: ACRM
1745 - 27.05.99 Standard format for messages
1746 - 13.07.15 Modified for BiopLib
1747 */
1748 static void MakeHBonds(REAL ***mcCoords, BOOL **gotAtom, int **hbond,
1749  REAL **hbondEnergy, int *residueTypes,
1750  int *chainEnd, int seqlen, BOOL verbose)
1751 {
1752  int otherRes,
1753  i,
1754  nbonds,
1755  firstChain,
1756  otherChain,
1757  resCount;
1758  REAL distON,
1759  distOH,
1760  distCH,
1761  distCN,
1762  energy,
1763  caDist;
1764 
1765 
1766  for(resCount=0; resCount<seqlen; resCount++)
1767  {
1768  for(i=0; i<MAX_NUM_HBOND; i++)
1769  {
1770  hbond[resCount][i] = 0;
1771  hbondEnergy[resCount][i] = 0.0;
1772  }
1773  }
1774  nbonds = 0;
1775  firstChain = 1;
1776 
1777  for(resCount=0; resCount<seqlen; resCount++)
1778  {
1779  if(resCount > chainEnd[firstChain])
1780  firstChain++;
1781 
1782  /* Check both N and H are present */
1783  if(gotAtom[ATOM_N][resCount] &&
1784  gotAtom[ATOM_H][resCount] &&
1785  residueTypes[resCount] != RESTYPE_PROLINE)
1786  {
1787  otherChain = 1;
1788  for(otherRes=0; otherRes<seqlen; otherRes++)
1789  {
1790  if(otherRes >= chainEnd[otherChain])
1791  otherChain++;
1792 
1793  if(((abs(resCount - otherRes) == 1) &&
1794  (firstChain != otherChain)) ||
1795  abs(resCount - otherRes) >= 2)
1796  {
1797  if(gotAtom[ATOM_CA][resCount] &&
1798  gotAtom[ATOM_CA][otherRes])
1799  {
1800  caDist = ATDIST(mcCoords[ATOM_CA][otherRes],
1801  mcCoords[ATOM_CA][resCount]);
1802  if(caDist < HBOND_MAX_CA_DIST)
1803  {
1804  if(gotAtom[ATOM_C][otherRes] &&
1805  gotAtom[ATOM_O][otherRes])
1806  {
1807  distON = ATDIST(mcCoords[ATOM_O][otherRes],
1808  mcCoords[ATOM_N][resCount]);
1809  distOH = ATDIST(mcCoords[ATOM_O][otherRes],
1810  mcCoords[ATOM_H][resCount]);
1811  distCH = ATDIST(mcCoords[ATOM_C][otherRes],
1812  mcCoords[ATOM_H][resCount]);
1813  distCN = ATDIST(mcCoords[ATOM_C][otherRes],
1814  mcCoords[ATOM_N][resCount]);
1815 
1816  if(APPROXEQ(distON,0.0) ||
1817  APPROXEQ(distOH,0.0) ||
1818  APPROXEQ(distCH,0.0) ||
1819  APPROXEQ(distCN,0.0))
1820  {
1821  if(verbose)
1822  {
1823  fprintf(stderr,"Sec Struc: (warning) \
1824 Coincident atoms in hydrogen bonding, donor %4d acceptor %4d\n",
1825  resCount+1, otherRes+1);
1826  }
1827  }
1828  else
1829  {
1830  energy = HBOND_Q1 * HBOND_Q2 * HBOND_F *
1831  (1.0/distON +
1832  1.0/distCH -
1833  1.0/distOH -
1834  1.0/distCN);
1835 
1836  if(energy < HBOND_ENERGY_LIMIT)
1837  {
1838  if(verbose)
1839  {
1840  fprintf(stderr,"Sec Struc: (warning) \
1841 Atom indices %d and %d too close O-N: %8.3f C-H: %8.3f O-H: %8.3f \
1842 C-N: %8.3f\n",
1843  resCount+1, otherRes+1,
1844  distON, distCH, distOH, distCN);
1845  }
1846  energy = HBOND_ENERGY_LIMIT;
1847  }
1848 
1849  if(energy < MAX_HBOND_ENERGY)
1850  {
1851  SetHBond(hbondEnergy, hbond, energy,
1852  resCount, otherRes, &nbonds,
1853  verbose);
1854  }
1855  }
1856  }
1857  }
1858  }
1859  }
1860  }
1861  }
1862  }
1863 
1864  if(verbose)
1865  {
1866  fprintf(stderr,"Sec Struc: (info) Total Number of H-bonds: %5d\n",
1867  nbonds);
1868  }
1869 }
1870 
1871 
1872 /************************************************************************/
1873 /*>static void MakeSummary(char **ssTable, char *detailSS, char *finalSS,
1874  int seqlen)
1875  ----------------------------------------------------------------------
1876 *//**
1877  \param[in] **ssTable The sec struc table [struc symbol][res]
1878  \param[out] *detailSS detailed ss assignment array
1879  \param[out] *finalSS final ss assignment array
1880  \param[in] seqlen the sequence length
1881 
1882  Create the secodary structure summary from the secondary structure
1883  bonding table using Kabsch/Sander priority rules
1884 
1885 - 19.05.99 Original By: ACRM
1886 - 13.07.15 Modified for BiopLib
1887 */
1888 static void MakeSummary(char **ssTable,
1889  char *detailSS,
1890  char *finalSS,
1891  int seqlen)
1892 {
1893  static char ssSymbols[] = "H EB GITS+";
1894  static char altSSSymbols[] = "h eb gits+";
1895 
1896  int resCount;
1897 
1898  for(resCount = 0; resCount < seqlen; resCount++)
1899  {
1900  detailSS[resCount] = SECSTR_COIL;
1901  finalSS[resCount] = SECSTR_COIL;
1902  }
1903 
1904  /* Alpha helices */
1905  MarkHelices(ssTable, detailSS, finalSS, SECSTR_IDX_ALPHAH,
1906  HELIX_CHUNK_SIZE, ssSymbols[SECSTR_IDX_ALPHAH],
1907  altSSSymbols[SECSTR_IDX_ALPHAH], seqlen);
1908 
1909  /* Sheets and bridges */
1910  MarkSheetsAndBridges(seqlen, ssTable, detailSS, finalSS, ssSymbols,
1911  altSSSymbols);
1912 
1913  /* 3_10 helices */
1914  MarkHelices(ssTable, detailSS, finalSS, SECSTR_IDX_THRTNH,
1916  altSSSymbols[SECSTR_IDX_THRTNH], seqlen);
1917 
1918  /* Pi helices */
1919  MarkHelices(ssTable, detailSS, finalSS, SECSTR_IDX_PIH, PIH_CHUNK_SIZE,
1920  ssSymbols[SECSTR_IDX_PIH], altSSSymbols[SECSTR_IDX_PIH],
1921  seqlen);
1922 
1923  /* Move to summary information */
1924  MarkHelicesInSummary(detailSS, ssSymbols[SECSTR_IDX_THRTNH],
1925  altSSSymbols[SECSTR_IDX_THRTNH],
1926  ssSymbols[SECSTR_IDX_TURN],
1927  altSSSymbols[SECSTR_IDX_TURN],
1928  THREE_TEN_CHUNK_SIZE, seqlen);
1929  MarkHelicesInSummary(finalSS, ssSymbols[SECSTR_IDX_THRTNH],
1930  altSSSymbols[SECSTR_IDX_THRTNH],
1931  ssSymbols[SECSTR_IDX_TURN],
1932  altSSSymbols[SECSTR_IDX_TURN],
1933  THREE_TEN_CHUNK_SIZE, seqlen);
1934  MarkHelicesInSummary(detailSS, ssSymbols[SECSTR_IDX_PIH],
1935  altSSSymbols[SECSTR_IDX_PIH],
1936  ssSymbols[SECSTR_IDX_TURN],
1937  altSSSymbols[SECSTR_IDX_TURN], PIH_CHUNK_SIZE,
1938  seqlen);
1939  MarkHelicesInSummary(finalSS, ssSymbols[SECSTR_IDX_PIH],
1940  altSSSymbols[SECSTR_IDX_PIH],
1941  ssSymbols[SECSTR_IDX_TURN],
1942  altSSSymbols[SECSTR_IDX_TURN], PIH_CHUNK_SIZE,
1943  seqlen);
1944 
1945  /* Turns */
1946  MarkTurns(ssTable, detailSS, finalSS, ssSymbols[SECSTR_IDX_TURN],
1947  altSSSymbols[SECSTR_IDX_TURN], seqlen);
1948 
1949  /* Bends */
1950  MarkBends(ssTable, detailSS, finalSS, seqlen);
1951 }
1952 
1953 
1954 /************************************************************************/
1955 /*>static BOOL MakeTurnsAndBridges(int **hbond, char **ssTable,
1956  REAL **mcAngles, int **bridgePoints,
1957  int *chainEnd, int seqlen,BOOL verbose)
1958  -----------------------------------------------------------------------
1959 *//**
1960  \param[in] **hbond HBond [resnum][hbondIndex]
1961  Array of the HBonds we've found
1962  \param[out] **ssTable The sec struc table [struc symbol][res]
1963  \param[in] **mcAngles Mainchain torsion angles
1964  \param[out] **bridgePoints Bridges between strands
1965  \param[in] *chainEnd Array indexed by chain number indicating
1966  the end of each chain
1967  \param[in] seqlen The sequence length
1968  \param[in] verbose Print messages
1969  \return Memory allocation OK
1970 
1971  Identify turns and bridges
1972 
1973  TODO: This code needs better commenting and breaking down into smaller
1974  subroutines
1975 
1976 - 19.05.99 Original By: ACRM
1977 - 27.05.99 Standard format for messages
1978 - 16.06.99 Initialise lastStrand to 0
1979 - 13.07.15 Modified for BiopLib
1980 - 09.08.16 Zero-basing
1981 */
1982 static BOOL MakeTurnsAndBridges(int **hbond, char **ssTable,
1983  REAL **mcAngles, int **bridgePoints,
1984  int *chainEnd, int seqlen,
1985  BOOL verbose)
1986 {
1987  static int turnSize[NUM_TURN_TYPE] = {3, 4, 5};
1988  static char turnChar[NUM_TURN_TYPE] = {'3', '4', '5'};
1989  static int turnIndex[NUM_TURN_TYPE] = {SECSTR_IDX_THRTNH,
1991  SECSTR_IDX_PIH};
1992  static int baseOfSt[NRULES][4] =
1993  {{-1, 0, 0, 1},
1994  { 0, 0, 0, 0},
1995  {-1, -1, -2, 1}
1996  };
1997  static int baseOffsets[NRULES][2] =
1998  {{2, -1},
1999  {1, 0},
2000  {2, -1}
2001  };
2002  static int bridgeOffsets[NRULES] =
2003  {1, -1, -1};
2004 
2005  static char upperCaseLetters[] =
2006  " ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2007  static char lowerCaseLetters[] =
2008  " abcdefghijklmnopqrstuvwxyz";
2009  static char nstchp[] =
2010  " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890";
2011 
2012  static int bridgeRules[2][NRULES] =
2013  {{-1, 1, -1},
2014  { 1, 1, -1}
2015  };
2016 
2017  int resCount,
2018  bondCount,
2019  turnCount,
2020  i,
2021  ruleCount,
2022  bondPointer,
2023  bridgeIndex,
2024  bridgePointer,
2025  theBridge,
2026  bridgeDirection,
2027  theNewBridge,
2028  resCountInSS,
2029  bridgePoint,
2030  newBridgePoint,
2031  strandNumber,
2032  startOfSheet,
2033  endOfSheet,
2034  strandCount,
2035  charCode,
2036  strandCharOffset,
2037  thisStrandOffset,
2038  nextStrandOffset,
2039  bulgeCount,
2040  bridgeCount,
2041  strandOffset,
2042  theBridgePoint,
2043  bridgeArrayIndex,
2044  bestValue,
2045  firstChain,
2046  seqlenMalloc,
2047  newBridgeIndex = 0,
2048  lastStrand = 0,
2049  **bridge = NULL,
2050  **strandCode = NULL,
2051  *sheetCode = NULL;
2052  char ch,
2053  strandChar,
2054  bridgeChar;
2055 
2056  /* Allocate memory for arrays */
2057  seqlenMalloc = seqlen;
2058 
2059  bridge = (int **)blArray2D(sizeof(int), NUM_BRIDGE, seqlenMalloc);
2060  strandCode = (int **)blArray2D(sizeof(int), NUM_STRAND_PAIR,
2061  seqlenMalloc);
2062  sheetCode = (int *)malloc(sizeof(int) * seqlenMalloc);
2063 
2064  if((bridge == NULL) || (strandCode == NULL) || (sheetCode == NULL))
2065  {
2066  if(bridge != NULL)
2067  blFreeArray2D((char **)bridge, NUM_BRIDGE, seqlenMalloc);
2068  if(strandCode != NULL)
2069  blFreeArray2D((char **)strandCode, NUM_STRAND_PAIR,
2070  seqlenMalloc);
2071  if(sheetCode != NULL)
2072  free(sheetCode);
2073 
2074  return(FALSE);
2075  }
2076 
2077  /* Initialize everything as being coil */
2078  for(resCount = 0; resCount < seqlen; resCount++)
2079  {
2080  for(i = 0; i<NUM_STRUC_SYMS; i++)
2081  {
2082  ssTable[i][resCount] = SECSTR_COIL;
2083  }
2084  }
2085 
2086  firstChain = 1;
2087 
2088  for(resCount=0; resCount<seqlen; resCount++)
2089  {
2090  if(resCount >= chainEnd[firstChain])
2091  firstChain++;
2092 
2093  for(bondCount=0; bondCount<2; bondCount++)
2094  {
2095  for(turnCount=0; turnCount<NUM_TURN_TYPE; turnCount++)
2096  {
2097  if(hbond[resCount][bondCount] != 0)
2098  {
2099  if((hbond[resCount][bondCount]-resCount-1 ==
2100  turnSize[turnCount]) &&
2101  (hbond[resCount][bondCount] <= chainEnd[firstChain]))
2102  {
2103  if(ssTable[turnIndex[turnCount]][resCount] ==
2105  {
2106  ssTable[turnIndex[turnCount]][resCount] =
2108  }
2109  else
2110  {
2111  ssTable[turnIndex[turnCount]][resCount] =
2113  }
2114  ssTable[turnIndex[turnCount]]
2115  [hbond[resCount][bondCount]-1] = SECSTR_BEND_END;
2116 
2117  for(i = resCount + 1;
2118  i < resCount + turnSize[turnCount];
2119  i++)
2120  {
2121  if(ssTable[turnIndex[turnCount]][i] == SECSTR_COIL)
2122  {
2123  ssTable[turnIndex[turnCount]][i] =
2124  turnChar[turnCount];
2125  }
2126 
2127  }
2128  for(i = resCount;
2129  i<= resCount + turnSize[turnCount];
2130  i++)
2131  {
2132  ssTable[SECSTR_IDX_TURN][i] = SECSTR_TURN;
2133  }
2134  }
2135  }
2136  }
2137  }
2138  }
2139 
2140  /* Initialize bridge counts to zero */
2141  for(resCount = 0; resCount < seqlen; resCount++)
2142  {
2143  for(bondCount = 0; bondCount < 4; bondCount++)
2144  {
2145  bridge[bondCount][resCount] = 0;
2146  }
2147  }
2148 
2149  for(ruleCount = 0; ruleCount<NRULES; ruleCount++)
2150  {
2151  for(resCount = baseOffsets[ruleCount][0];
2152  resCount <= (seqlen + baseOffsets[ruleCount][1]);
2153  resCount++)
2154  {
2155  int bond_i;
2156 
2157  for(bond_i = 0; bond_i < 2; bond_i++)
2158  {
2159  bondPointer =
2160  hbond[resCount+baseOfSt[ruleCount][0]-1][bond_i];
2161 
2162  if((ruleCount <= PARLEL) || (bondPointer > resCount))
2163  {
2164  bridgePointer = bondPointer + baseOfSt[ruleCount][1];
2165 
2166  if((bondPointer > -baseOfSt[ruleCount][2]) &&
2167  (abs(resCount - bridgePointer) > BRIDGE_SEP))
2168  {
2169  int bond_j;
2170 
2171  for(bond_j = 0; bond_j < 2; bond_j++)
2172  {
2173  if(hbond[bondPointer+baseOfSt[ruleCount][2]-1]
2174  [bond_j] ==
2175  resCount+baseOfSt[ruleCount][3])
2176  {
2177  int altBridgeIndex;
2178 
2179  bridgeIndex = 0;
2180 
2181  if(bridge[bridgeIndex][resCount-1] != 0)
2182  bridgeIndex = 1;
2183 
2184  bridge[bridgeIndex][resCount-1] =
2185  bridgeOffsets[ruleCount] * bridgePointer;
2186 
2187  bridge[bridgeIndex+2][resCount-1] =
2188  bridgeRules[0][ruleCount];
2189 
2190  altBridgeIndex = 1;
2191 
2192  if(bridge[altBridgeIndex-1][bridgePointer-1] !=0)
2193  altBridgeIndex = 2;
2194 
2195  bridge[altBridgeIndex-1][bridgePointer-1] =
2196  bridgeOffsets[ruleCount] * resCount;
2197 
2198  bridge[altBridgeIndex+2-1][bridgePointer-1] =
2199  bridgeRules[1][ruleCount];
2200  }
2201  }
2202  }
2203  }
2204  }
2205  }
2206  }
2207 
2208  for(resCount = 0; resCount < seqlen; resCount++)
2209  {
2210  if(APPROXNE(mcAngles[DIHED_CHIRAL][resCount], NULLVAL))
2211  {
2212  if(mcAngles[DIHED_CHIRAL][resCount] < 0.0)
2213  {
2214  ssTable[SECSTR_IDX_CHISGN][resCount] = '-';
2215  }
2216  else
2217  {
2218  ssTable[SECSTR_IDX_CHISGN][resCount] = '+';
2219  }
2220  }
2221  }
2222 
2223  for(resCount = 0; resCount < seqlen; resCount++)
2224  {
2225  sheetCode[resCount] = 0;
2226  strandCode[0][resCount] = 0;
2227  strandCode[1][resCount] = 0;
2228  bridgePoints[0][resCount] = 0;
2229  bridgePoints[1][resCount] = 0;
2230  }
2231  strandNumber = 0;
2232 
2233  for(resCount=0; resCount<seqlen; resCount++)
2234  {
2235  for(bridgeIndex=0; bridgeIndex<2; bridgeIndex++)
2236  {
2237  if((bridge[bridgeIndex][resCount] != 0) &&
2238  (abs(bridge[bridgeIndex][resCount]) >= resCount))
2239  {
2240  /* bridge[][] array contains the bridge number +1 */
2241  theBridge = abs(bridge[bridgeIndex][resCount]) - 1;
2242  bridgeDirection = bridge[bridgeIndex][resCount] /
2243  (theBridge+1);
2244  bridgePoint = 1;
2245 
2246  if(abs(bridge[0][theBridge]) == resCount+1)
2247  bridgePoint = 0;
2248 
2249  for(resCountInSS = (resCount + 2);
2250  resCountInSS <= MIN(seqlen,resCount+BULGE_SIZE_L);
2251  resCountInSS++)
2252  {
2253  for(newBridgeIndex = 0;
2254  newBridgeIndex < NUM_STRAND_PAIR;
2255  newBridgeIndex++)
2256  {
2257  if((bridge[newBridgeIndex][resCountInSS-1] *
2258  bridgeDirection) > 0)
2259  {
2260  /* bridge[][] array contains the bridge number +1 */
2261  theNewBridge = abs(bridge[newBridgeIndex]
2262  [resCountInSS-1]) - 1;
2263 
2264  if(abs(theNewBridge-theBridge)!=0)
2265  {
2266  if((theNewBridge-theBridge) /
2267  abs(theNewBridge-theBridge) ==
2268  bridgeDirection)
2269  {
2270  if((resCountInSS-resCount-1 > BULGE_SIZE_S &&
2271  abs(theNewBridge-theBridge) <=
2272  BULGE_SIZE_S) ||
2273  (resCountInSS-resCount-1 <= BULGE_SIZE_S &&
2274  abs(theNewBridge-theBridge) <=
2275  BULGE_SIZE_L))
2276  {
2277  newBridgePoint = 1;
2278  if(abs(bridge[0][theNewBridge]) ==
2279  resCountInSS)
2280  {
2281  newBridgePoint = 0;
2282  }
2283 
2284  for(i = resCount; i< resCountInSS; i++)
2285  {
2286  if(ssTable[SECSTR_IDX_SHEET][i] ==
2287  SECSTR_COIL)
2288  {
2289  ssTable[SECSTR_IDX_SHEET][i] =
2290  SECSTR_SHEET;
2291  }
2292  }
2293 
2294  if(bridge[bridgeIndex+2][resCount] < 0)
2295  {
2296  ssTable[SECSTR_IDX_SHEET][resCount] =
2298  }
2299 
2300  if(bridge[newBridgeIndex+2][resCountInSS-1]
2301  < 0)
2302  {
2303  ssTable[SECSTR_IDX_SHEET][resCountInSS-1] =
2305  }
2306 
2307  for(i = theBridge;
2308  (bridgeDirection < 0) ?
2309  i>=theNewBridge :
2310  i<=theNewBridge;
2311  i+=bridgeDirection)
2312  {
2313  if(ssTable[SECSTR_IDX_SHEET][i] ==
2314  SECSTR_COIL)
2315  {
2316  ssTable[SECSTR_IDX_SHEET][i] =
2317  SECSTR_SHEET;
2318  }
2319  }
2320 
2321  if(bridge[bridgePoint+2][theBridge] < 0)
2322  {
2323  ssTable[SECSTR_IDX_SHEET][theBridge] =
2325  }
2326 
2327  if(bridge[newBridgePoint+2][theNewBridge] <
2328  0)
2329  {
2330  ssTable[SECSTR_IDX_SHEET][theNewBridge] =
2332  }
2333 
2334  if(strandCode[bridgeIndex][resCount] == 0)
2335  {
2336  strandNumber++;
2337  strandCode[bridgeIndex][resCount] =
2338  strandNumber * bridgeDirection;
2339  strandCode[bridgePoint][theBridge] =
2340  strandNumber * bridgeDirection;
2341  }
2342 
2343  strandCode[newBridgeIndex][resCountInSS-1] =
2344  strandCode[bridgeIndex][resCount];
2345  strandCode[newBridgePoint][theNewBridge] =
2346  strandCode[bridgePoint][theBridge];
2347 
2348  /* Force break from both newBridgeIndex and
2349  resCountInSS loops
2350  */
2351  resCountInSS = MAX(seqlen,
2352  resCount+1+BULGE_SIZE_L);
2353  break;
2354  }
2355  }
2356  }
2357  }
2358  }
2359  }
2360 
2361  ch = SECSTR_BRIDGE_FWD;
2362  if(bridgeDirection < 0)
2363  ch = SECSTR_BRIDGE_BACKWD;
2364  ssTable[SECSTR_IDX_BRIDGE][resCount] = ch;
2365  ssTable[SECSTR_IDX_BRIDGE][theBridge] = ch;
2366  }
2367  }
2368  }
2369 
2370 
2371  resCount = 0;
2372 
2373  do{
2374  while(ssTable[SECSTR_IDX_SHEET][resCount] == SECSTR_COIL)
2375  {
2376  resCount++;
2377  if(resCount >= seqlen)
2378  goto RES_COUNT_ERROR;
2379  }
2380 
2381  startOfSheet = resCount;
2382  resCount = startOfSheet + 1;
2383 
2384  while(ssTable[SECSTR_IDX_SHEET][resCount] != SECSTR_COIL)
2385  {
2386  resCount++;
2387  if(resCount >= seqlen)
2388  break;
2389  }
2390  endOfSheet = (resCount - 1);
2391  lastStrand = 0;
2392 
2393  FindNextStrand(strandCode, startOfSheet, endOfSheet, &strandCount,
2394  &newBridgeIndex, lastStrand, &bestValue);
2395 
2396  while(bestValue != 0)
2397  {
2398  lastStrand = fabs(strandCode[newBridgeIndex][strandCount]);
2399  charCode = lastStrand%NUM_STRAND_CHARS;
2400 
2401  if(charCode == 0)
2402  charCode = NUM_STRAND_CHARS;
2403 
2404  if(strandCode[newBridgeIndex][strandCount] < 0)
2405  {
2406  strandChar = upperCaseLetters[charCode];
2407  }
2408  else
2409  {
2410  strandChar = lowerCaseLetters[charCode];
2411  }
2412 
2413  strandCharOffset = SECSTR_IDX_BRIDG1;
2414 
2415  if(ssTable[SECSTR_IDX_BRIDG1][strandCount] != SECSTR_COIL)
2416  strandCharOffset = SECSTR_IDX_BRIDG2;
2417 
2418  if(strandCharOffset != SECSTR_IDX_BRIDG2)
2419  {
2420  thisStrandOffset = strandCount+1;
2421 
2422  do
2423  {
2424  nextStrandOffset =
2425  FindNextCodeOccurrence(thisStrandOffset, endOfSheet,
2426  strandCode[newBridgeIndex]
2427  [strandCount],
2428  strandCode);
2429  if(nextStrandOffset <= 0)
2430  break;
2431 
2432  if(ssTable[SECSTR_IDX_BRIDG1][nextStrandOffset-1] !=
2433  SECSTR_COIL)
2434  {
2435  strandCharOffset = SECSTR_IDX_BRIDG2;
2436  }
2437 
2438  thisStrandOffset = nextStrandOffset;
2439  } while(strandCharOffset != SECSTR_IDX_BRIDG2);
2440  }
2441 
2442  ssTable[strandCharOffset][strandCount] = strandChar;
2443 
2444  bridgePoints[strandCharOffset-SECSTR_IDX_BRIDG1][strandCount] =
2445  fabs(bridge[newBridgeIndex][strandCount]);
2446 
2447  thisStrandOffset = strandCount+1;
2448 
2449  for(;;)
2450  {
2451  nextStrandOffset =
2452  FindNextCodeOccurrence(thisStrandOffset,endOfSheet,
2453  strandCode[newBridgeIndex]
2454  [strandCount],
2455  strandCode);
2456 
2457  if(nextStrandOffset <= 0)
2458  break;
2459 
2460  for(bulgeCount = thisStrandOffset;
2461  bulgeCount < (nextStrandOffset - 1);
2462  bulgeCount++)
2463  {
2464  ssTable[strandCharOffset][bulgeCount] = SECSTR_BULGE;
2465  }
2466 
2467  ssTable[strandCharOffset][nextStrandOffset-1] = strandChar;
2468  strandOffset = 0;
2469 
2470  if(strandCode[0][nextStrandOffset-1] !=
2471  strandCode[newBridgeIndex][strandCount])
2472  {
2473  strandOffset = 1;
2474  }
2475 
2476  bridgePoints[strandCharOffset-SECSTR_IDX_BRIDG1]
2477  [nextStrandOffset-1] =
2478  fabs(bridge[strandOffset][nextStrandOffset-1]);
2479 
2480  thisStrandOffset = nextStrandOffset;
2481  }
2482 
2483  FindNextStrand(strandCode, startOfSheet, endOfSheet,
2484  &strandCount, &newBridgeIndex, lastStrand,
2485  &bestValue);
2486  }
2487  resCount = endOfSheet + 1;
2488  } while (resCount < seqlen);
2489 
2490 
2491 RES_COUNT_ERROR:
2492  if(verbose && (lastStrand > NUM_STRAND_CHARS))
2493  {
2494  fprintf(stderr,"Sec Struc: (warning) There are %3d strands. Labels \
2495 have restarted\n",
2496  lastStrand);
2497  }
2498 
2499  LabelSheets(ssTable, bridgePoints, sheetCode, seqlen, verbose);
2500 
2501  for(resCount=0; resCount<seqlen; resCount++)
2502  {
2503  if(sheetCode[resCount] != 0)
2504  {
2505  charCode = sheetCode[resCount];
2506  ssTable[SECSTR_IDX_SHEET1][resCount] = nstchp[charCode];
2507  }
2508  }
2509 
2510  bridgeCount = 0;
2511 
2512  for(resCount=0; resCount<seqlen; resCount++)
2513  {
2514  for(newBridgeIndex=0;
2515  newBridgeIndex<NUM_STRAND_PAIR;
2516  newBridgeIndex++)
2517  {
2518  if((bridge[newBridgeIndex][resCount] != 0) &&
2519  (strandCode[newBridgeIndex][resCount] == 0) &&
2520  (fabs(bridge[newBridgeIndex][resCount]) >= resCount))
2521  {
2522  bridgeCount++;
2523  theBridgePoint = fabs(bridge[newBridgeIndex][resCount]);
2524  charCode = bridgeCount%NUM_STRAND_CHARS;
2525 
2526  if(charCode == 0)
2527  charCode = NUM_STRAND_CHARS;
2528 
2529  if(bridge[newBridgeIndex][resCount] < 0)
2530  {
2531  bridgeChar = upperCaseLetters[charCode];
2532  }
2533  else
2534  {
2535  bridgeChar = lowerCaseLetters[charCode];
2536  }
2537 
2538  bridgeArrayIndex = SECSTR_IDX_BRIDG1;
2539 
2540  if(ssTable[SECSTR_IDX_BRIDG1][resCount] != SECSTR_COIL)
2541  bridgeArrayIndex = SECSTR_IDX_BRIDG2;
2542 
2543  ssTable[bridgeArrayIndex][resCount] = bridgeChar;
2544 
2545  bridgePoints[bridgeArrayIndex-SECSTR_IDX_BRIDG1][resCount] =
2546  theBridgePoint;
2547 
2548  bridgeArrayIndex = SECSTR_IDX_BRIDG1;
2549 
2550  if(ssTable[SECSTR_IDX_BRIDG1][theBridgePoint-1] !=
2551  SECSTR_COIL)
2552  bridgeArrayIndex = SECSTR_IDX_BRIDG2;
2553 
2554  ssTable[bridgeArrayIndex][theBridgePoint-1] = bridgeChar;
2555 
2556  bridgePoints[bridgeArrayIndex-SECSTR_IDX_BRIDG1]
2557  [theBridgePoint-1] = resCount+1;
2558  }
2559  }
2560  }
2561 
2562  if(verbose && (bridgeCount > NUM_STRAND_CHARS))
2563  {
2564  fprintf(stderr,"Sec Struc: (warning) There are %3d bridges. Labels \
2565 have restarted\n",
2566  bridgeCount);
2567  }
2568 
2569  if(bridge != NULL)
2570  blFreeArray2D((char **)bridge, NUM_BRIDGE, seqlenMalloc);
2571  if(strandCode != NULL)
2572  blFreeArray2D((char **)strandCode, NUM_STRAND_PAIR, seqlenMalloc);
2573  if(sheetCode != NULL)
2574  free(sheetCode);
2575 
2576  return(TRUE);
2577 }
2578 
2579 
2580 /************************************************************************/
2581 /*>static void RejectHBond(int resIndex, int *bond, REAL *bonde)
2582  -------------------------------------------------------------
2583 *//**
2584  \param[in] resIndex Current position in the list of residues
2585  \param[in,out] *bond Bond array
2586  \param[in,out] *bonde Bond energy array
2587 
2588  Reject a hydrogen bond because others have better energy
2589 
2590 - 19.05.99 Original By: ACRM
2591 - 13.07.15 Modified for BiopLib
2592 */
2593 static void RejectHBond(int resIndex, int *bond, REAL *bonde)
2594 {
2595  int i=1;
2596 
2597  if(bond[0] != resIndex)
2598  i++;
2599 
2600  if(i==1)
2601  {
2602  bond[0] = bond[1];
2603  bonde[0] = bonde[1];
2604  }
2605 
2606  bond[1] = 0;
2607  bonde[1] = 0.0;
2608 }
2609 
2610 
2611 /************************************************************************/
2612 /*>static void SetSheet(int bridgePoint, int sheetNum, int *sheetCode,
2613  char **ssTable, int seqlen)
2614  -------------------------------------------------------------------
2615 *//**
2616  \param[in] bridgePoint A bridge point residue
2617  \param[in] sheetNum The sheet number
2618  \param[out] *sheetCode The sheet number for each residue
2619  \param[in] **ssTable The sec struc table [struc symbol][res]
2620  \param[in] seqlen The sequence length
2621 
2622  Find the bounds of the parent sheet containing bridgePoint and flag
2623  this as partially set as sheet
2624 
2625 - 19.05.99 Original By: ACRM
2626 - 13.07.15 Modified for BiopLib
2627 */
2628 static void SetSheet(int bridgePoint,
2629  int sheetNum,
2630  int *sheetCode,
2631  char **ssTable,
2632  int seqlen)
2633 {
2634  int resCount,
2635  sheetStart,
2636  sheetEnd;
2637 
2638 
2639  resCount = bridgePoint-1;
2640 
2641  while(ssTable[SECSTR_IDX_SHEET][resCount] != SECSTR_COIL)
2642  {
2643  resCount++;
2644  if(resCount >= seqlen)
2645  break;
2646  }
2647 
2648  sheetEnd = resCount - 1;
2649  resCount = bridgePoint-1;
2650 
2651  while(ssTable[SECSTR_IDX_SHEET][resCount] != SECSTR_COIL)
2652  {
2653  resCount--;
2654  if(resCount < 1)
2655  break;
2656  }
2657 
2658  sheetStart = resCount + 1;
2659 
2660  for(resCount = sheetStart; resCount <= sheetEnd; resCount++)
2661  {
2662  sheetCode[resCount] = (-sheetNum);
2663  }
2664 }
2665 
2666 
2667 /************************************************************************/
2668 /*>static void LabelSheets(char **ssTable, int **bridgePoints,
2669  int *sheetCode, int seqlen, BOOL verbose)
2670  -----------------------------------------------------------------
2671 *//**
2672  \param[in] **ssTable The sec struc table [struc symbol][res]
2673  \param[in] **bridgePoints Bridges between strands
2674  \param[out] *sheetCode The sheet number for each residue
2675  \param[in] seqlen The sequence length
2676  \param[in] verbose Print messages
2677 
2678  Label the sheets. At this stage we use numeric labels. These are turned
2679  into alphabetic symbols later
2680 
2681 - 19.05.99 Original By: ACRM
2682 - 27.05.99 Standard format for messages
2683 - 13.07.15 Modified for BiopLib
2684 */
2685 static void LabelSheets(char **ssTable, int **bridgePoints,
2686  int *sheetCode, int seqlen, BOOL verbose)
2687 {
2688  int resCount,
2689  sheetStart,
2690  sheetEnd,
2691  sheetNum,
2692  resCountInSheet,
2693  bridgeIndex;
2694  BOOL found;
2695 
2696  for(resCount=0; resCount<seqlen; resCount++)
2697  {
2698  sheetCode[resCount] = 0;
2699  }
2700 
2701  sheetNum = 0;
2702  resCount = 1;
2703 
2704  for(;;)
2705  {
2706  FindFirstUnsetSheet(resCount, ssTable, sheetCode, &sheetStart,
2707  &sheetEnd, seqlen);
2708  if(sheetStart != 0)
2709  {
2710  sheetNum++;
2711  for(resCountInSheet = sheetStart;
2712  resCountInSheet <= sheetEnd;
2713  resCountInSheet++)
2714  {
2715  sheetCode[resCountInSheet-1] = sheetNum;
2716 
2717  for(bridgeIndex = 0;
2718  bridgeIndex < NUM_BRIDGE_PAIR;
2719  bridgeIndex++)
2720  {
2721  if(bridgePoints[bridgeIndex][resCountInSheet-1] != 0)
2722  {
2723  if(sheetCode[bridgePoints[bridgeIndex]
2724  [resCountInSheet-1]-1] == 0)
2725  {
2726  SetSheet(bridgePoints[bridgeIndex][resCountInSheet-1],
2727  sheetNum, sheetCode, ssTable, seqlen);
2728  }
2729  }
2730  }
2731  }
2732 
2733  do
2734  {
2735  FindNextPartialSheet(sheetEnd+1, sheetNum, ssTable,
2736  sheetCode, bridgePoints, &found, seqlen);
2737  } while(found);
2738 
2739  resCount = sheetEnd + 1;
2740 
2741  if(resCount > seqlen)
2742  break;
2743  }
2744  else
2745  {
2746  break;
2747  }
2748  }
2749 
2750  if(verbose && (sheetNum > NUM_STRAND_CHARS))
2751  {
2752  fprintf(stderr,"Sec Struc: (warning) There are %3d sheets. Labels \
2753 have restarted\n",
2754  sheetNum);
2755  }
2756 }
2757 
2758 
2759 /************************************************************************/
2760 /*>static int FindNextCodeOccurrence(int strandStart, int strandEnd,
2761  int code, int **strandCode)
2762  -----------------------------------------------------------------
2763 *//**
2764  \param[in] strandStart Start of a strand
2765  \param[in] strandEnd End of a strand
2766  \param[in] code Current strand code
2767  \param[in] **strandCode Array of strand codes[bridge][strand]
2768  \return Offset into strandCode array
2769 
2770  Search for the next occurrence of 'code' in the strandCode array
2771 
2772 - 19.05.99 Original By: ACRM
2773 - 13.07.15 Modified for BiopLib
2774 */
2775 static int FindNextCodeOccurrence(int strandStart, int strandEnd,
2776  int code, int **strandCode)
2777 {
2778  int nxpl;
2779 
2780  for(nxpl=strandStart; nxpl<strandEnd; nxpl++)
2781  {
2782  if((strandCode[0][nxpl] == code) ||
2783  (strandCode[1][nxpl] == code))
2784  {
2785  return(nxpl+1);
2786  }
2787  }
2788 
2789  return(0);
2790 }
2791 
2792 /************************************************************************/
2793 /*>static void MarkTurns(char **ssTable, char *detailSS, char *finalSS,
2794  char turnChar, char altTurnChar, int seqlen)
2795  --------------------------------------------------------------------
2796 *//**
2797  \param[in] **ssTable The sec struc table [struc symbol][res]
2798  \param[out] *detailSS detailed ss assignment array
2799  \param[out] *finalSS final ss assignment array
2800  \param[in] turnChar Character for a turn (T)
2801  \param[in] altTurnChar Alternate character for a turn (t)
2802  \param[in] seqlen The sequence length
2803 
2804  Search for isolated turn type bonds. If found set them to the 'turn'
2805  symbol
2806 
2807 - 19.05.99 Original By: ACRM
2808 - 13.07.15 Modified for BiopLib
2809 */
2810 static void MarkTurns(char **ssTable, char *detailSS, char *finalSS,
2811  char turnChar, char altTurnChar, int seqlen)
2812 {
2813  int resCount,
2814  symbolCount,
2815  helixCount,
2816  helixPri[NUM_HELIX_TYPE],
2817  helixSize[NUM_HELIX_TYPE];
2818 
2819  helixPri[0] = SECSTR_IDX_ALPHAH;
2820  helixPri[1] = SECSTR_IDX_THRTNH;
2821  helixPri[2] = SECSTR_IDX_PIH;
2822  helixSize[0] = HELIX_CHUNK_SIZE;
2823  helixSize[1] = THREE_TEN_CHUNK_SIZE;
2824  helixSize[2] = PIH_CHUNK_SIZE;
2825 
2826  for(helixCount = 0; helixCount < NUM_HELIX_TYPE; helixCount++)
2827  {
2828  if(ssTable[helixPri[helixCount]][0] == SECSTR_BEND_START &&
2829  ssTable[helixPri[helixCount]][1] != SECSTR_BEND_START)
2830  {
2831  for(symbolCount = 1;
2832  symbolCount < helixSize[helixCount];
2833  symbolCount++)
2834  {
2835  if(detailSS[symbolCount] == SECSTR_COIL)
2836  {
2837  detailSS[symbolCount] = turnChar;
2838  }
2839 
2840  if(finalSS[symbolCount] == SECSTR_COIL ||
2841  finalSS[symbolCount] == altTurnChar)
2842  {
2843  finalSS[symbolCount] = turnChar;
2844  }
2845  }
2846 
2847  if(finalSS[0] == SECSTR_COIL)
2848  finalSS[0] = altTurnChar;
2849 
2850  if(finalSS[helixSize[helixCount]] == SECSTR_COIL)
2851  finalSS[helixSize[helixCount]] = altTurnChar;
2852  }
2853 
2854  for(resCount = 1;
2855  resCount < seqlen - helixSize[helixCount];
2856  resCount++)
2857  {
2858  if((ssTable[helixPri[helixCount]][resCount] ==
2860  ssTable[helixPri[helixCount]][resCount] ==
2861  SECSTR_BEND_BOTH) &&
2862  (ssTable[helixPri[helixCount]][resCount-1] !=
2864  ssTable[helixPri[helixCount]][resCount-1] !=
2865  SECSTR_BEND_BOTH) &&
2866  (ssTable[helixPri[helixCount]][resCount+1] !=
2868  ssTable[helixPri[helixCount]][resCount+1] !=
2869  SECSTR_BEND_BOTH))
2870  {
2871  for(symbolCount = resCount + 1;
2872  symbolCount < (resCount + helixSize[helixCount]);
2873  symbolCount++)
2874  {
2875  if(detailSS[symbolCount] == SECSTR_COIL)
2876  {
2877  detailSS[symbolCount] = turnChar;
2878  }
2879 
2880  if(finalSS[symbolCount] == SECSTR_COIL ||
2881  finalSS[symbolCount] == altTurnChar)
2882  {
2883  finalSS[symbolCount] = turnChar;
2884  }
2885  }
2886 
2887  if(finalSS[resCount] == SECSTR_COIL)
2888  finalSS[resCount] = altTurnChar;
2889 
2890  if(finalSS[resCount+helixSize[helixCount]] == SECSTR_COIL)
2891  finalSS[resCount+helixSize[helixCount]] = altTurnChar;
2892  }
2893  }
2894  }
2895 }
2896 
2897 
2898 /************************************************************************/
2899 /*>static void MarkSheetsAndBridges(int seqlen, char **ssTable,
2900  char *detailSS, char *finalSS,
2901  char *ssSymbols, char *altSSSymbols)
2902  ---------------------------------------------------------------------
2903 *//**
2904  \param seqlen The sequence length
2905  \param **ssTable The sec struc table [struc symbol][res]
2906  \param *detailSS detailed ss assignment array
2907  \param *finalSS final ss assignment array
2908  \param *ssSymbols array of symbols for secondary structures
2909  \param *altSSSymbols array of alternative symbols for sec strucs
2910 
2911  Put in symbols for the secondary structure elements for sheets and
2912  bridges.
2913 
2914 - 19.05.99 Original By: ACRM
2915 - 13.07.15 Modified for BiopLib
2916 */
2917 static void MarkSheetsAndBridges(int seqlen, char **ssTable,
2918  char *detailSS, char *finalSS,
2919  char *ssSymbols, char *altSSSymbols)
2920 {
2921  int resCount;
2922 
2923  for(resCount = 0; resCount < seqlen; resCount++)
2924  {
2925  if(ssTable[SECSTR_IDX_SHEET][resCount] != SECSTR_COIL)
2926  {
2927  if(detailSS[resCount] == SECSTR_COIL)
2928  detailSS[resCount] = ssSymbols[SECSTR_IDX_SHEET];
2929 
2930  if(finalSS[resCount] == SECSTR_COIL ||
2931  finalSS[resCount] == altSSSymbols[SECSTR_IDX_ALPHAH] ||
2932  finalSS[resCount] == altSSSymbols[SECSTR_IDX_SHEET])
2933  {
2934  finalSS[resCount] = ssSymbols[SECSTR_IDX_SHEET];
2935  }
2936 
2937  if((ssTable[SECSTR_IDX_SHEET][resCount] == SECSTR_SHEET_SMALL) &&
2938  (resCount > 0))
2939  {
2940  if(finalSS[resCount-1] == SECSTR_COIL ||
2941  finalSS[resCount-1] == ssSymbols[SECSTR_IDX_BRIDGE])
2942  {
2943  finalSS[resCount-1] = altSSSymbols[SECSTR_IDX_SHEET];
2944  }
2945 
2946  if(resCount < seqlen-1)
2947  {
2948  if(finalSS[resCount+1] == SECSTR_COIL ||
2949  finalSS[resCount-1] == ssSymbols[SECSTR_IDX_BRIDGE])
2950  {
2951  finalSS[resCount+1] = altSSSymbols[SECSTR_IDX_SHEET];
2952  }
2953  }
2954  }
2955  }
2956 
2957  if(ssTable[SECSTR_IDX_BRIDGE][resCount] != SECSTR_COIL)
2958  {
2959  if(detailSS[resCount] == SECSTR_COIL)
2960  detailSS[resCount] = ssSymbols[SECSTR_IDX_BRIDGE];
2961 
2962  if(finalSS[resCount] == SECSTR_COIL)
2963  finalSS[resCount] = ssSymbols[SECSTR_IDX_BRIDGE];
2964  }
2965  }
2966 }
2967 
2968 
2969 /************************************************************************/
2970 /*>static void FindNextStrand(int **strandCode, int sheetStart,
2971  int sheetEnd, int *strandCount,
2972  int *startIndex, int lastStrand,
2973  int *bestValue)
2974  ------------------------------------------------------------
2975 *//**
2976  \param[in] **strandCode Array of strand codes[bridge][strand]
2977  \param[in] sheetStart Start of a sheet
2978  \param[in] sheetEnd End of a sheet
2979  \param[out] *strandCount The strand count
2980  \param[out] *startIndex The new bridge
2981  \param[in] lastStrand The last strand
2982  \param[out] *bestValue Best strand
2983 
2984  Find where the next beta strand starts
2985 
2986 - 19.05.99 Original By: ACRM
2987 - 13.07.15 Modified for BiopLib
2988 */
2989 static void FindNextStrand(int **strandCode, int sheetStart, int sheetEnd,
2990  int *strandCount, int *startIndex,
2991  int lastStrand, int *bestValue)
2992 {
2993  int nxstr, nxpl;
2994 
2995  *bestValue = 0;
2996 
2997  for(nxstr=sheetStart; nxstr<=sheetEnd; nxstr++)
2998  {
2999  for(nxpl=0; nxpl<NUM_STRAND_PAIR; nxpl++)
3000  {
3001  if(abs(strandCode[nxpl][nxstr]) > lastStrand)
3002  {
3003  if(*bestValue == 0)
3004  {
3005  *bestValue = abs(strandCode[nxpl][nxstr]);
3006  *startIndex = nxpl;
3007  *strandCount = nxstr;
3008  }
3009  else
3010  {
3011  if(abs(strandCode[nxpl][nxstr]) < *bestValue)
3012  {
3013  *bestValue = abs(strandCode[nxpl][nxstr]);
3014  *startIndex = nxpl;
3015  *strandCount = nxstr;
3016  }
3017  }
3018  }
3019  }
3020  }
3021 }
3022 
3023 
3024 
#define MAX(a, b)
Definition: macros.h:239
#define SECSTR_IDX_BRIDGE
Definition: secstr.c:135
#define SECSTR_SHEET
Definition: secstr.h:71
#define SECSTR_SHEET_SMALL
Definition: secstr.h:72
#define SECSTR_BREAK
Definition: secstr.h:74
Include file for PDB routines.
#define DIHED_PSI
Definition: secstr.c:124
#define SECSTR_BEND_BOTH
Definition: secstr.h:66
int resnum
Definition: pdb.h:310
#define ATOM_CB
Definition: secstr.c:119
#define ANINT(x)
Definition: secstr.c:187
#define MAX_NUM_CHN
Definition: secstr.c:92
short BOOL
Definition: SysDefs.h:64
#define NULL
Definition: array2.c:99
#define SECSTR_TURN
Definition: secstr.h:73
#define CST
Definition: secstr.c:153
Definition: pdb.h:298
#define ATOM_N
Definition: secstr.c:114
#define SECSTR_BRIDGE_BACKWD
Definition: secstr.h:70
#define MAX_PEPTIDE_BOND
Definition: secstr.c:161
#define ATOM_CA
Definition: secstr.c:115
Header for secondary structure calculation.
#define ATOM_C
Definition: secstr.c:116
#define MAXBUFF
Secondary structure calculation.
Definition: secstr.c:87
char ** blArray2D(int size, int dim1, int dim2)
Definition: array2.c:130
#define MAX_NUM_HBOND
Definition: secstr.c:91
#define ATOM_H
Definition: secstr.c:118
#define HBOND_ENERGY_LIMIT
Definition: secstr.c:169
#define SECSTR_IDX_BRIDG2
Definition: secstr.c:137
#define NUM_HELIX_TYPE
Definition: secstr.c:144
#define FALSE
Definition: macros.h:223
#define NEXT(x)
Definition: macros.h:249
#define COORD_DIM
Definition: secstr.c:88
#define FREE_SECSTR_MEMORY
Definition: secstr.c:190
#define APPROXNE(x, y)
Definition: secstr.c:178
#define SECSTR_IDX_SHEET1
Definition: secstr.c:133
#define SECSTR_COIL
Definition: secstr.h:62
#define DIHED_TCO
Definition: secstr.c:129
#define DIHED_CHIRAL
Definition: secstr.c:126
#define BRIDGE_SEP
Definition: secstr.c:149
#define NUM_STRUC_SYMS
Definition: secstr.c:131
Useful macros.
#define NUM_DIHED_DATA
Definition: secstr.c:108
#define NUM_BRIDGE
Definition: secstr.c:104
Include file for 2D/3D array functions.
#define NUM_TURN_TYPE
Definition: secstr.c:156
#define MIN(a, b)
Definition: macros.h:240
char atnam[8]
Definition: pdb.h:316
#define SECSTR_IDX_THRTNH
Definition: secstr.c:138
#define SECSTR_IDX_TURN
Definition: secstr.c:140
#define SECSTR_BULGE
Definition: secstr.h:75
#define ATOM_O
Definition: secstr.c:117
char *** blArray3D(int size, int dim1, int dim2, int dim3)
Definition: array3.c:127
char resnam[8]
Definition: pdb.h:319
#define ATDIST(a, b)
Definition: secstr.c:181
double REAL
Definition: MathType.h:67
REAL blPhi(REAL xi, REAL yi, REAL zi, REAL xj, REAL yj, REAL zj, REAL xk, REAL yk, REAL zk, REAL xl, REAL yl, REAL zl)
Definition: phi.c:107
#define DIHED_OMEGA
Definition: secstr.c:125
REAL z
Definition: pdb.h:300
#define RADIAN
Definition: secstr.c:158
#define SECSTR_BRIDGE_FWD
Definition: secstr.h:69
#define HELIX_CHUNK_SIZE
Definition: secstr.c:145
void blFreeArray2D(char **array, int dim1, int dim2)
Definition: array2.c:174
#define MAX_HBOND_ENERGY
Definition: secstr.c:168
#define THREE_TEN_CHUNK_SIZE
Definition: secstr.c:146
#define NULLVAL
Definition: secstr.c:159
#define NUM_STANDARD_DIHEDRALS
Definition: secstr.c:122
#define INSERTMATCH(ins1, ins2)
Definition: pdb.h:496
#define HBOND_Q1
Definition: secstr.c:171
#define HBOND_F
Definition: secstr.c:173
Include file for angle functions.
#define APPROXEQ(x, y)
Definition: secstr.c:177
char secstr
Definition: pdb.h:325
#define HBOND_MAX_CA_DIST
Definition: secstr.c:170
#define SECSTR_IDX_PIH
Definition: secstr.c:139
#define TRUE
Definition: macros.h:219
#define DIHED_IMPLD
Definition: secstr.c:127
#define HBOND_Q2
Definition: secstr.c:172
#define SECSTR_IDX_SHEET
Definition: secstr.c:134
#define SECSTR_IDX_ALPHAH
Definition: secstr.c:132
#define SECSTR_BEND_START
Definition: secstr.h:64
#define SECSTR_IDX_BRIDG1
Definition: secstr.c:136
#define NUM_BRIDGE_PAIR
Definition: secstr.c:106
#define SECSTR_BEND_END
Definition: secstr.h:65
#define RESTYPE_PROLINE
Definition: secstr.c:109
System-type variable type definitions.
#define NUM_STRAND_PAIR
Definition: secstr.c:105
PDB * blFindNextResidue(PDB *pdb)
#define NST
Definition: secstr.c:154
#define NUM_MC_ATOM_TYPES
Definition: secstr.c:113
#define NUM_STRAND_CHARS
Definition: secstr.c:89
#define PARLEL
Definition: secstr.c:102
Type definitions for maths.
#define SECSTR_IDX_BEND
Definition: secstr.c:141
#define BEND_SIZE
Definition: secstr.c:164
#define SECSTR_IDX_CHISGN
Definition: secstr.c:142
#define SECSTR_ERR_NOERR
Definition: secstr.h:83
char chain[blMAXCHAINLABEL]
Definition: pdb.h:321
#define NRULES
Definition: secstr.c:101
#define DIHED_PHI
Definition: secstr.c:123
#define BULGE_SIZE_S
Definition: secstr.c:151
#define PIH_CHUNK_SIZE
Definition: secstr.c:147
#define MAX_NUM_ANGLES
Definition: secstr.c:121
int blCalcSecStrucPDB(PDB *pdbStart, PDB *pdbStop, BOOL verbose)
Definition: secstr.c:315
REAL x
Definition: pdb.h:300
REAL y
Definition: pdb.h:300
#define MAX_CA_DISTANCE
Definition: secstr.c:162
#define DIHED_KAPPA
Definition: secstr.c:128
#define SECSTR_ERR_NOMEM
Definition: secstr.h:84
char insert[8]
Definition: pdb.h:320
#define BULGE_SIZE_L
Definition: secstr.c:150
#define SECSTR_BEND
Definition: secstr.h:68