Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
ReadSimplePIR.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file ReadSimplePIR.c
5 
6  \version V2.9
7  \date 07.07.14
8  \brief
9 
10  \copyright (c) UCL / Dr. Andrew C. R. Martin 1991-2014
11  \author Dr. Andrew C. R. Martin
12  \par
13  Institute of Structural & Molecular Biology,
14  University College London,
15  Gower Street,
16  London.
17  WC1E 6BT.
18  \par
19  andrew@bioinf.org.uk
20  andrew.martin@ucl.ac.uk
21 
22 **************************************************************************
23 
24  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
25  according to the conditions laid out in the accompanying file
26  COPYING.DOC.
27 
28  The code may be modified as required, but any modifications must be
29  documented so that the person responsible can be identified.
30 
31  The code may not be sold commercially or included as part of a
32  commercial product except as described in the file COPYING.DOC.
33 
34 **************************************************************************
35 
36  Description:
37  ============
38 
39 
40 **************************************************************************
41 
42  Usage:
43  ======
44 
45 \code
46  int ReadSimplePIR(FILE *fp, int maxres, char **seqs)
47 \endcode
48 
49  This version (previously called ReadPIR()) is maintained only for
50  compatibility with the old version. It will only read minimal
51  specification PIR files.
52 
53 **************************************************************************
54 
55  Revision History:
56  =================
57 - V1.0 01.06.92 Original
58 - V2.0 08.03.94 Changed name of ReadPIR() to ReadSimplePIR()
59  Added new ReadPIR().
60 - V2.1 18.03.94 getc() -> fgetc()
61 - V2.2 11.05.94 Changes to ReadPIR() for better compatibility with
62  PIR V38.0 and V39.0
63 - V2.3 28.02.95 Added ReadRawPIR()
64 - V2.4 13.03.95 Fixed bug in reading text lines in ReadRawPIR()
65 - V2.5 26.07.95 Removed unused variables
66 - V2.6 30.10.95 Cosmetic
67 - V2.7 06.02.96 Removes trailing spaces from comment line
68 - V2.8 18.06.02 Added string.h
69 - V2.9 07.07.14 Use bl prefix for functions By: CTP
70 
71 *************************************************************************/
72 /* Doxygen
73  -------
74  #GROUP Handling Sequence Data
75  #SUBGROUP File IO
76  #FUNCTION blReadSimplePIR()
77  Read a PIR file containing multiple chains of up to maxres amino acids.
78  Doesn't handle special PIR characters
79 */
80 /************************************************************************/
81 /* Includes
82 */
83 #include <ctype.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 
88 /************************************************************************/
89 /* Defines and macros
90 */
91 
92 /************************************************************************/
93 /* Globals
94 */
95 
96 /************************************************************************/
97 /* Prototypes
98 */
99 
100 
101 /************************************************************************/
102 /*>int blReadSimplePIR(FILE *fp, int maxres, char **seqs)
103  ------------------------------------------------------
104 *//**
105 
106  \param[in] *fp File pointer
107  \param[in] maxres Max number of residues in chain.
108  \param[out] **seqs Array of pointers to sequences
109  \return Number of chains. 0 if error
110 
111  Read a PIR file containing multiple chains of up to maxres amino acids.
112  Each chain is returned in seqs[].
113  The number of chains is returned by the routine.
114  0 is returned if a memory allocation failed
115 
116 - 01.06.91 Original
117 - 03.03.94 Added check on case before toupper(). Changed name.
118 - 18.03.94 Changed getc() to fgetc()
119 - 07.07.14 Use bl prefix for functions By: CTP
120 */
121 int blReadSimplePIR(FILE *fp,
122  int maxres,
123  char **seqs)
124 {
125  char *buffer;
126  int rescount = 0,
127  chain = 0;
128 
129  /* Allocate space for the sequence */
130  buffer = (char *)malloc((maxres+1) * sizeof(char));
131  if(!buffer) return(0);
132 
133  /* Read header lines from the file */
134  fgets(buffer,maxres-1,fp);
135  fgets(buffer,maxres-1,fp);
136 
137  /* Now loop through to get the sequence */
138  while(rescount<maxres && !feof(fp))
139  {
140  int ch;
141 
142  /* Get a character */
143  ch = fgetc(fp);
144  if(ch==EOF) break;
145  buffer[rescount] = ch;
146 
147  if(isalpha(buffer[rescount]))
148  {
149  /* If it's an alpha character, then toupper() it and
150  increment the counter.
151  */
152  buffer[rescount] = (isupper(buffer[rescount]) ?
153  buffer[rescount] :
154  toupper(buffer[rescount]));
155  rescount++;
156  }
157  else if(buffer[rescount] == '*')
158  {
159  /* If it's a star, then it's the end of a chain,
160  so copy the chain
161  */
162  buffer[rescount] = '\0';
163  seqs[chain] = (char *)malloc((rescount+2)*sizeof(char));
164  if(!seqs[chain]) return(-1);
165  strcpy(seqs[chain],buffer);
166  chain++;
167  rescount=0;
168  }
169  }
170 
171  /* Check to see if the last chain ended without a * */
172  if(rescount)
173  {
174  buffer[rescount] = '\0';
175  seqs[chain] = (char *)malloc((rescount+2)*sizeof(char));
176  if(!seqs[chain])
177  {
178  int i;
179  for(i=0; i<=chain; i++)
180  {
181  if(seqs[i]) free(seqs[i]);
182  }
183  return(0);
184  }
185  strcpy(seqs[chain],buffer);
186  chain++;
187  }
188  free(buffer);
189 
190  return(chain);
191 }
192 
193 
int blReadSimplePIR(FILE *fp, int maxres, char **seqs)