#!/usr/bin/env python3 """ Program: pdb2sprot File: pdb2sprot.py Version: V1.0 Date: 01.03.18 Function: Obtain SwissProt information for a PDB code and residue number using PDBSWS Copyright: (c) Dr. Andrew C. R. Martin, UCL, 2018 Author: Dr. Andrew C. R. Martin Address: Institute of Structural and Molecular Biology Division of Biosciences University College London -------------------------------------------------------------------------- This program is released under the GNU Public Licence (GPL V3) -------------------------------------------------------------------------- Description: ============ This program takes a PDB code and residue identifier and returns the relevant UniProtKB/SwissProt (or trEMBL) accession together with the residue number in the sequence entry -------------------------------------------------------------------------- Usage: ====== pdb2sprot PDBID RESID -------------------------------------------------------------------------- Revision History: ================= V1.0 01.03.18 Original By: ACRM """ #************************************************************************* # Import libraries from urllib import request import sys import re import config #************************************************************************* def ParseRST(result): """Parse the results from PDBSWS Input: result --- The result returned by PDBSWS Return: (ac, resnum) --- A list containing the UniProt accession code and the UniProt residue number 01.03.18 Original By: ACRM """ pattern = re.compile('.*AC:\s+(.*?)#') match = pattern.match(result) ac = match.group(1) pattern = re.compile('.*UPCOUNT:\s+(.*?)#') match = pattern.match(result) resnum = int(match.group(1)) return(ac, resnum) #************************************************************************* def ReadPDBSWS(pdbcode, resid): """Obtain the UniProt accession, and residue number from PDBSWS given a PDB code, and residue identifier Input: pdbcode --- A PDB code (e.g. 8cat) resid --- A residue identifier (e.g. A23) Return: (ac, resnum) --- A list containing the UniProt accession code and the UniProt residue number 01.03.18 Original By: ACRM """ # Obtain the chain and residue number from the resid # e.g. A 23 C pattern = re.compile('([a-zA-Z]+)([0-9]+[a-zA-z]*)') match = pattern.match(resid) chain = match.group(1) resnum = match.group(2) url = config.baseurl url += '&id=' + pdbcode url += '&chain=' + chain url += '&res=' + str(resnum) result = request.urlopen(url).read() result = str(result, encoding='utf-8') result = result.replace('\n', '#') # Success - parse and return the list if (result != ''): rst = ParseRST(result) return(rst) # Failure - return a blank list return([]) #************************************************************************* def UsageDie(): """Print a usage message and exit 01.03.18 Original By: ACRM """ print( """ pdb2sws V1.0 (c) 2018 UCL, Dr. Andrew C.R. Martin Usage: pdb2sws pdbcode resid Obtain the UniProtKB/SwissProt accession and residue number for a PDB code (e.g. 8cat) and residue identifier (e.g. A23) """) exit(0) #************************************************************************* #*** Main program *** #************************************************************************* if ((len(sys.argv) < 3) or (sys.argv[1] == '-h')): UsageDie() pdbcode = sys.argv[1] resid = sys.argv[2] results = ReadPDBSWS(pdbcode, resid) if (len(results)): ac = results[0] upresnum = results[1] print('Accession: ' + ac) print('UniProt Resnum: ' + str(upresnum)) else: print('PDB code or residue identifier not found.')