139 #define CLEAR_BIOMOL(b) \
141 (b)->details = NULL; \
142 (b)->numBiomolecules = 0; \
143 (b)->biomolNumber = 0; \
144 (b)->authorUnit[0] = '\0'; \
145 (b)->softwareUnit[0] = '\0'; \
146 (b)->chains = NULL; \
183 char *header,
int maxheader,
184 char *date,
int maxdate,
185 char *pdbcode,
int maxcode)
192 for(i=0; i<maxheader; i++) header[i] =
'\0';
193 for(i=0; i<maxdate; i++) date[i] =
'\0';
194 for(i=0; i<maxcode; i++) pdbcode[i] =
'\0';
198 if(!strncmp(s->
string,
"HEADER", 6))
201 strncpy(header, s->
string+10,
MIN(40, maxheader));
203 strncpy(date, s->
string+50,
MIN( 9, maxdate));
204 strncpy(pdbcode, s->
string+62,
MIN( 4, maxcode));
237 if(!strncmp(s->
string,
"TITLE ", 6))
240 strcpy(buffer, s->
string);
289 if(!strncmp(s->
string, type, 6))
291 if(strstr(s->
string,
"MOL_ID:"))
320 char *data,
char *type,
char *field)
329 for(s=molidStart; s!=molidStop;
NEXT(s))
331 if(strncmp(s->
string, type, 6))
336 if(GotField && isdigit(s->
string[9]))
350 chp += strlen(field);
356 if(GotField && (chp !=
NULL))
367 if((chp=strchr(data,
';'))!=
NULL)
406 compnd->
chain[0] =
'\0';
409 compnd->
ec[0] =
'\0';
412 compnd->
other[0] =
'\0';
416 fprintf(stderr,
"DEBUG: Chain %s molid %d\n", chain, molid);
424 molidFirst = FindNextMolIDRecord(wpdb->
header,
"COMPND");
426 for(molidStart=molidFirst; molidStart!=
NULL; molidStart=molidStop)
431 molidStop = FindNextMolIDRecord(molidStart,
"COMPND");
433 ExtractField(molidStart, molidStop,
434 buffer,
"COMPND",
"MOL_ID:");
435 sscanf(buffer,
"%d", &thisMolid);
437 if(thisMolid == molid)
439 ExtractField(molidStart, molidStop,
440 compnd->
molecule,
"COMPND",
"MOLECULE:");
441 ExtractField(molidStart, molidStop,
442 compnd->
chain,
"COMPND",
"CHAIN:");
443 ExtractField(molidStart, molidStop,
444 compnd->
fragment,
"COMPND",
"FRAGMENT:");
445 ExtractField(molidStart, molidStop,
446 compnd->
synonym,
"COMPND",
"SYNONYM:");
447 ExtractField(molidStart, molidStop,
448 compnd->
ec,
"COMPND",
"EC:");
449 ExtractField(molidStart, molidStop,
451 ExtractField(molidStart, molidStop,
452 compnd->
mutation,
"COMPND",
"MUTATION:");
453 ExtractField(molidStart, molidStop,
454 compnd->
other,
"COMPND",
"OTHER:");
455 ExtractField(molidStart, molidStop,
456 buffer,
"COMPND",
"MOL_ID:");
457 sscanf(buffer,
"%d", &(compnd->
molid));
487 molidFirst = FindNextMolIDRecord(wpdb->
header,
"COMPND");
489 for(molidStart=molidFirst; molidStart!=
NULL; molidStart=molidStop)
495 molidStop = FindNextMolIDRecord(molidStart,
"COMPND");
496 ExtractField(molidStart, molidStop, buffer,
"COMPND",
"CHAIN:");
505 if(!strcmp(word, chain))
507 ExtractField(molidStart, molidStop,
508 buffer,
"COMPND",
"MOL_ID:");
509 sscanf(buffer,
"%d", &molid);
551 molidFirst = FindNextMolIDRecord(wpdb->
header,
"SOURCE");
553 for(molidStart=molidFirst; molidStart!=
NULL; molidStart=molidStop)
555 molidStop = FindNextMolIDRecord(molidStart,
"SOURCE");
556 for(s=molidStart; s!=molidStop;
NEXT(s))
561 ExtractField(molidStart, molidStop, buffer,
562 "SOURCE",
"MOL_ID:");
563 sscanf(buffer,
"%d", &thisMolid);
565 if(thisMolid == molid)
568 "SOURCE",
"ORGANISM_SCIENTIFIC:");
569 ExtractField(molidStart, molidStop, source->
commonName,
570 "SOURCE",
"ORGANISM_COMMON:");
571 ExtractField(molidStart, molidStop, source->
strain,
572 "SOURCE",
"STRAIN:");
573 ExtractField(molidStart, molidStop, buffer,
574 "SOURCE",
"ORGANISM_TAXID:");
575 sscanf(buffer,
"%d",&source->
taxid);
611 compnd->
chain[0] =
'\0';
614 compnd->
ec[0] =
'\0';
617 compnd->
other[0] =
'\0';
620 molidFirst = FindNextMolIDRecord(wpdb->
header,
"COMPND");
623 for(molidStart=molidFirst; molidStart!=
NULL; molidStart=molidStop)
625 molidStop = FindNextMolIDRecord(molidStart,
"COMPND");
626 for(s=molidStart; s!=molidStop;
NEXT(s))
631 ExtractField(molidStart, molidStop,
632 buffer,
"COMPND",
"MOL_ID:");
633 sscanf(buffer,
"%d", &thisMolid);
635 if(thisMolid == molid)
637 ExtractField(molidStart, molidStop,
638 compnd->
molecule,
"COMPND",
"MOLECULE:");
639 ExtractField(molidStart, molidStop,
640 compnd->
chain,
"COMPND",
"CHAIN:");
641 ExtractField(molidStart, molidStop,
642 compnd->
fragment,
"COMPND",
"FRAGMENT:");
643 ExtractField(molidStart, molidStop,
644 compnd->
synonym,
"COMPND",
"SYNONYM:");
645 ExtractField(molidStart, molidStop,
646 compnd->
ec,
"COMPND",
"EC:");
647 ExtractField(molidStart, molidStop,
649 ExtractField(molidStart, molidStop,
650 compnd->
mutation,
"COMPND",
"MUTATION:");
651 ExtractField(molidStart, molidStop,
652 compnd->
other,
"COMPND",
"OTHER_DETAILS:");
653 ExtractField(molidStart, molidStop,
654 buffer,
"COMPND",
"MOL_ID:");
655 sscanf(buffer,
"%d", &(compnd->
molid));
694 molidFirst = FindNextMolIDRecord(wpdb->
header,
"SOURCE");
698 for(molidStart=molidFirst; molidStart!=
NULL; molidStart=molidStop)
700 molidStop = FindNextMolIDRecord(molidStart,
"SOURCE");
701 for(s=molidStart; s!=molidStop;
NEXT(s))
706 ExtractField(molidStart, molidStop, buffer,
707 "SOURCE",
"MOL_ID:");
708 sscanf(buffer,
"%d", &thisMolid);
710 if(thisMolid == molid)
713 "SOURCE",
"ORGANISM_SCIENTIFIC:");
714 ExtractField(molidStart, molidStop, source->
commonName,
715 "SOURCE",
"ORGANISM_COMMON:");
716 ExtractField(molidStart, molidStop, source->
strain,
717 "SOURCE",
"STRAIN:");
718 ExtractField(molidStart, molidStop, buffer,
719 "SOURCE",
"ORGANISM_TAXID:");
720 sscanf(buffer,
"%d",&source->
taxid);
762 static char *sequence =
NULL;
776 if((sequence=(
char *)malloc(ArraySize *
sizeof(
char)))==
NULL)
786 if(!strncmp(buffer,
"SEQRES",6))
789 "%11x%1s%7x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x\
790 %3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s",
792 seq3[0], seq3[1], seq3[2], seq3[3], seq3[4],
793 seq3[5], seq3[6], seq3[7], seq3[8], seq3[9],
794 seq3[10], seq3[11], seq3[12]);
796 if((nres == 0) && !AddStar)
799 strcpy(lastchain, chain);
803 else if(nres+15 >= ArraySize)
807 if((sequence=(
char *)realloc((
void *)sequence,
808 ArraySize*
sizeof(
char)))
817 sequence[nres++] =
'*';
818 strcpy(lastchain, chain);
826 if(!strncmp(seq3[i],
" ",3))
831 if(sequence[nres] ==
'X')
848 if(!strlen(sequence))
857 sequence[nres++] =
'*';
859 sequence[nres++] =
'\0';
861 chains[nchain][0] =
'\0';
892 if(!strncmp(s->
string,
"MODRES", 6))
905 fprintf(stderr,
"pdb2pir: Error! No memory for modres\n");
910 strncpy(m->modres, ch, 3);
914 strncpy(m->origres, ch, 3);
916 if(m->origres[0] ==
' ')
918 strncpy(m->origres,
"XXX ", 4);
945 if(!strncmp(modAA, m->
modres, 3))
976 int SkipStandardRemark = 0;
980 if(!strncmp(s->
string,
"REMARK 300", 10))
982 if(!strncmp(s->
string,
"REMARK 300 BIOMOLECULE:", 23))
988 if(biomolecule ==
NULL)
991 if(biomolecule ==
NULL)
996 SkipStandardRemark = 1;
999 strncpy(buffer, s->
string+24, 80);
1004 if((chp = strrchr(buffer,
' '))==
NULL)
1016 if(SkipStandardRemark)
1018 if(SkipStandardRemark++ > 5)
1022 strncpy(buffer, s->
string+11, 80);
1035 return(biomolecule);
1071 if(biomolecule !=
NULL)
1076 if(!strncmp(s->
string,
"REMARK 350", 10))
1082 if(biomolecule ==
NULL)
1085 if(biomolecule ==
NULL)
1092 "REMARK 350 BIOMOLECULE:", 23))
1108 firstRecord =
FALSE;
1110 else if(!strncmp(s->
string,
1111 "REMARK 350 AUTHOR DETERMINED", 28))
1117 else if(!strncmp(s->
string,
1118 "REMARK 350 SOFTWARE DETERMINED", 30))
1124 else if(!strncmp(s->
string,
1125 "REMARK 350 APPLY THE FOLLOWING TO CHAINS:",41))
1143 else if(!strncmp(s->
string,
1144 "REMARK 350 AND CHAINS:",41))
1155 else if(!strncmp(s->
string,
"REMARK 350 BIOMT", 18))
1162 strncpy(buffer, s->
string+18, 80);
1164 if(sscanf(buffer,
"%d %d %lf %lf %lf %lf",
1165 &line, &entry, &val[0], &val[1], &val[2], &val[3]))
1197 return(biomolecule);
1236 biomolecule = doRemark300(wpdb);
1237 biomolecule = doRemark350(wpdb, biomolecule);
1239 return(biomolecule);
1256 if(biomolecule ==
NULL)
1264 for(bm=biomolecule; bm!=
NULL;
NEXT(bm))
1303 static char *sequence =
NULL;
1320 if((sequence=(
char *)malloc(ArraySize *
sizeof(
char)))==
NULL)
1323 lastchain[0] =
'\0';
1330 if(!strncmp(buffer,
"SEQRES",6))
1333 "%11x%1s%7x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s%1x\
1334 %3s%1x%3s%1x%3s%1x%3s%1x%3s%1x%3s",
1336 seq3[0], seq3[1], seq3[2], seq3[3], seq3[4],
1337 seq3[5], seq3[6], seq3[7], seq3[8], seq3[9],
1338 seq3[10], seq3[11], seq3[12]);
1340 if(lastchain[0] ==
'\0')
1343 strcpy(lastchain, chain);
1346 if(nres+15 >= ArraySize)
1350 if((sequence=(
char *)realloc((
void *)sequence,
1351 ArraySize*
sizeof(
char)))
1361 sequence[nres++] =
'\0';
1370 strcpy(lastchain, chain);
1376 if(!strncmp(seq3[i],
" ",3))
1381 if(sequence[nres] ==
'X')
1408 sequence[nres++] =
'\0';
1420 int main(
int argc,
char **argv)
1437 if((in=fopen(argv[1],
"r"))!=
NULL)
1446 printf(
"Header: '%s'\n", header);
1447 printf(
"Date: '%s'\n", date);
1448 printf(
"PDB code: '%s'\n", pdbcode);
1453 printf(
"Title: '%s'\n", title);
1458 printf(
"Number of Biomolecules: %d\n",
1462 printf(
"REMARK 300 Details: %s\n", s->
string);
1465 for(bm=biomolecule; bm!=
NULL;
NEXT(bm))
1469 printf(
"Biomolecule: %d\n", bm->biomolNumber);
1470 printf(
" Author Unit: %s\n", bm->authorUnit);
1471 printf(
" Software Unit: %s\n", bm->softwareUnit);
1472 printf(
" Chains: %s\n",
1473 (bm->chains?bm->chains:
""));
1475 for(bmt=bm->biomt; bmt!=
NULL;
NEXT(bmt))
1479 printf(
" Matrix %d\n", bmt->
biomtNum);
1482 printf(
" %8.6f %8.6f %8.6f %8.6f\n",
1495 for(i=0; i<nChains; i++)
1499 printf(
"\n\n>>>Chain: %s\n", chainLabels[i]);
1503 printf(
"molid: %d\n", compound.
molid);
1504 printf(
"molecule: %s\n", compound.
molecule);
1505 printf(
"chain: %s\n", compound.
chain);
1506 printf(
"fragment: %s\n", compound.
fragment);
1507 printf(
"synonym: %s\n", compound.
synonym);
1508 printf(
"ec: %s\n", compound.
ec);
1509 printf(
"engineered: %s\n", compound.
engineered);
1510 printf(
"mutation: %s\n", compound.
mutation);
1511 printf(
"other: %s\n", compound.
other);
1516 printf(
"Common name: %s\n", species.
commonName);
1517 printf(
"Strain: %s\n", species.
strain);
1518 printf(
"Tax ID: %d\n", species.
taxid);
1521 free(chainLabels[i]);
1527 char **chains =
NULL;
1532 printf(
"\n\nSEQRES Sequence data:\n");
1534 for(i=0; chains[i]!=
NULL; i++)
1536 printf(
"Chain: %2s Seq: %s\n", chains[i],
int main(int argc, char **argv)
char strain[MAXPDBANNOTATION]
Include file for PDB routines.
#define FREESTRINGLIST(l)
char blThronex(char *three)
char fragment[MAXPDBANNOTATION]
char ** blGetHashKeyList(HASHTABLE *hashtable)
#define KILLTRAILSPACES(x)
char engineered[MAXPDBANNOTATION]
Defines for using hash functions.
char * blStrncat(char *out, const char *in, size_t len)
BOOL gBioplibSeqNucleicAcid
char * blGetHashValueString(HASHTABLE *hashtable, char *key)
BOOL blSetHashValueString(HASHTABLE *hashtable, char *key, char *value)
WHOLEPDB * blReadWholePDB(FILE *fpin)
char * blCollapseSpaces(char *inText)
int fsscanf(char *buffer, char *format,...)
HASHTABLE * blInitializeHash(ULONG hashsize)
char * blStrcatalloc(char *instr, char *catstr)
Header file for sequence handling.
char other[MAXPDBANNOTATION]
char commonName[MAXPDBANNOTATION]
void blFreeHashKeyList(char **keylist)
struct _stringlist * next
void blFreeHash(HASHTABLE *hashtable)
Include file for fsscanf()
char synonym[MAXPDBANNOTATION]
char * blGetWord(char *buffer, char *word, int maxsize)
char ec[MAXPDBANNOTATION]
char chain[MAXPDBANNOTATION]
STRINGLIST * blStoreString(STRINGLIST *StringList, char *string)
Header file for general purpose routines.
#define CHAINMATCH(chain1, chain2)
char molecule[MAXPDBANNOTATION]
#define STRNCPYNOSPACES(out, in, mx)
char ** blGetPDBChainLabels(PDB *pdb, int *nChains)
#define PADCHARMINTERM(s, c, l)
char scientificName[MAXPDBANNOTATION]
char mutation[MAXPDBANNOTATION]