03. January 2021
CCTOP
transmembrane domain predeciton
direct interface
1python cctop_template.py -i tm.fa -s yes -f yes
python 2 script from website
cctop_template.py
1#!/usr/bin/python
2from __future__ import division
3import os
4import sys
5import time
6from lxml import etree
7import lxml.etree as ET
8from os import popen
9from os import system
10sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
11URL="http://cctop.enzim.ttk.mta.hu/php/"
12def msg1(line):
13 sys.stderr.write("'"+str(sys.argv[0])+" -i fasta file' or '"+str(sys.argv[0])+" -i fasta file [signal peptide] [tmfilter]'"+"\n")
14 sys.stderr.write("Example: "+str(sys.argv[0])+" -i /home/user/protein.fas -s yes -f no"+"\n")
15 sys.stderr.write("Mandatory option"+"\n")
16 sys.stderr.write("-i filename or --input=filename: input sequences in fasta format"+"\n")
17 sys.stderr.write("Other options:"+"\n")
18 sys.stderr.write("-o filename or --output=filename: path to output results file, if omitted stdout is used"+"\n")
19 sys.stderr.write("-s yes or --signal=yes: switch on signal peptid prediction (default is no signal peptid prediction)"+"\n")
20 sys.stderr.write("-f yes or --tmfilter=yes: switch on tmfilter discrimination (default is no tmfilter discrimination)"+"\n")
21 sys.stderr.write("After submission, the script will check for results every 30 sec."+"\n")
22 sys.stderr.write("Status changes are displayed."+"\n")
23def getargs(line):
24 signalr="no"
25 tmfilterr="no"
26 loc=""
27 out=""
28 outfile=""
29 for i in range (0, len(line)):
30 if line[i][:2]=="-i":
31 if i+1>=len(line):
32 msg1(line)
33 return ["",False,False,"none"]
34 loc=line[i+1]
35 if line[i][:7]=="--input":
36 if i+1>=len(line):
37 msg1(line)
38 return ["",False,False,"none"]
39 loc=line[i][line[i].find("=")+1:]
40 if line[i][:2]=="-s":
41 if i+1>=len(line):
42 msg1(line)
43 return ["",False,False,"none"]
44 signalr=line[i+1]
45 if line[i][:8]=="--signal":
46 if i+1>=len(line):
47 msg1(line)
48 return ["",False,False,"none"]
49 signalr=line[i][line[i].find("=")+1:]
50 if line[i][:2]=="-f":
51 if i+1>=len(line):
52 msg1(line)
53 return ["",False,False,"none"]
54 tmfilterr=line[i+1]
55 if line[i][:10]=="--tmfilter":
56 if i+1>=len(line):
57 msg1(line)
58 return ["",False,False,"none"]
59 tmfilterr=line[i][line[i].find("=")+1:]
60 if line[i][:2]=="-o":
61 if i+1>=len(line):
62 msg1(line)
63 return ["",False,False,"none"]
64 out=line[i+1]
65 if line[i][:8]=="--output":
66 if i+1>=len(line):
67 msg1(line)
68 return ["",False,False,"none"]
69 out=line[i][line[i].find("=")+1:]
70 if loc=="":
71 msg1(line)
72 return ["",False,False,"none"]
73 else:
74 try:
75 fastafile=open(loc,"r")
76 if out!="":
77 try:
78 outfile=open(out,"w")
79 except IOError:
80 sys.stderr.write("Invalid path, check output file location (use full path) or permissions"+"\n")
81 sys.stderr.write(str(out)+"\n")
82 fastafile.close()
83 return ["",False,False,"none"]
84 if signalr.lower()=="yes":
85 signal=True
86 sys.stderr.write("Signal peptide prediction ON"+"\n")
87 elif signalr.lower()=="no":
88 signal=False
89 sys.stderr.write("Signal peptide prediction OFF"+"\n")
90 else:
91 msg1(line)
92 return ["",False,False,"none"]
93 if tmfilterr.lower()=="yes":
94 tmfilter=True
95 sys.stderr.write("TMFilter ON"+"\n")
96 elif tmfilterr.lower()=="no":
97 tmfilter=False
98 sys.stderr.write("TMFilter OFF"+"\n")
99 else:
100 msg1(line)
101 return ["",False,False,"none"]
102 except IOError:
103 sys.stderr.write("Invalid path, check fasta file location (use full path)"+"\n")
104 sys.stderr.write(str(loc)+"\n")
105 return ["",False,False,"none"]
106 return [fastafile,signal,tmfilter,outfile]
107def get(xml,jobID,doneID,error,running,scheduled):
108 for key in jobID:
109 check=True
110 for i in range (0, len(error)):
111 if error[i]==key:
112 check=False
113 for i in range (0, len(doneID)):
114 if doneID[i]==key:
115 check=False
116 if check==True:
117 status=popen("wget -qO- '"+URL+"poll.php?jobId="+jobID[key]+"'")
118 content=status.readlines()
119 for i in range (0, len(content)):
120 if content[i].lower().find("invalid")!=-1:
121 error.append(key)
122 if content[i].lower().find("scheduled")!=-1:
123 sch=True
124 for j in range (0, len(scheduled)):
125 if scheduled[j]==key:
126 sch=False
127 if sch==True:
128 scheduled.append(key)
129 if content[i].lower().find("error")!=-1:
130 error.append(key)
131 if content[i].lower().find("finished")!=-1:
132 doneID.append(key)
133 result=popen("wget '"+URL+"result.php?jobId="+jobID[key]+"' -o /dev/null -O -")
134 content2=result.read()
135 doc = etree.fromstring(content2)
136 xml.append(doc)
137 try:
138 scheduled.remove(key)
139 except ValueError:
140 try:
141 running.remove(key)
142 except ValueError:
143 pass
144 if content[i].lower().find("running")!=-1:
145 r=True
146 for j in range (0, len(running)):
147 if running[j]==key:
148 r=False
149 if r==True:
150 running.append(key)
151 try:
152 scheduled.remove(key)
153 except ValueError:
154 pass
155 time.sleep(0.5)
156 return [xml,doneID,error,running,scheduled]
157def submit(sequences,signal,tmfilter):
158 jobID={}
159 fail=[]
160 if tmfilter==True and signal==True:
161 for key in sequences:
162 up=popen("wget -qO- --post-data='sequence=%3E"+key+"%0A"+sequences[key]+"&tmFilter=&signalPred=' "+URL+"submit.php")
163 content=up.readlines()
164 if content[0].find("Invalid sequence")!=-1:
165 fail.append(key)
166 else:
167 ID=content[0].replace("ID: ","")
168 jobID[key]=ID.strip()
169 time.sleep(1)
170 elif signal==True:
171 for key in sequences:
172 up=popen("wget -qO- --post-data='sequence=%3E"+key+"%0A"+sequences[key]+"&signalPred=' "+URL+"submit.php")
173 content=up.readlines()
174 if content[0].find("Invalid sequence")!=-1:
175 fail.append(key)
176 else:
177 ID=content[0].replace("ID: ","")
178 jobID[key]=ID.strip()
179 time.sleep(1)
180 elif tmfilter==True:
181 for key in sequences:
182 up=popen("wget -qO- --post-data='sequence=%3E"+key+"%0A"+sequences[key]+"&tmFilter=' "+URL+"submit.php")
183 content=up.readlines()
184 if content[0].find("Invalid sequence")!=-1:
185 fail.append(key)
186 else:
187 ID=content[0].replace("ID: ","")
188 jobID[key]=ID.strip()
189 time.sleep(1)
190 else:
191 for key in sequences:
192 up=popen("wget -qO- --post-data='sequence=%3E"+key+"%0A"+sequences[key]+"' "+URL+"submit.php")
193 content=up.readlines()
194 if content[0].find("Invalid sequence")!=-1:
195 fail.append(key)
196 else:
197 ID=content[0].replace("ID: ","")
198 jobID[key]=ID.strip()
199 time.sleep(1)
200 if len(fail)!=0:
201 sys.stderr.write("invalid sequence format, use 20 letter amino acid code"+"\n")
202 for i in range (0, len(fail)):
203 sys.stderr.write(fail[i]+"\n")
204 return [jobID,len(fail)]
205def read(fastafile):
206 seq=""
207 header=""
208 sequences={}
209 while 1:
210 line=fastafile.readline()
211 if line=="":
212 break
213 if line[0]==">":
214 if seq!="" and header!="":
215 seq=seq.replace(" ","")
216 sequences[header]=seq
217 seq=""
218 header=line[1:].strip()
219 else:
220 seq=seq+line.strip()
221 seq=seq.replace(" ","")
222 sequences[header]=seq
223 fastafile.close()
224 return sequences
225xml=[]
226if len(sys.argv)<2:
227 msg1(sys.argv)
228elif sys.argv[1].lower()=="help" or sys.argv[1].lower()=="-h" or sys.argv[1].lower()=="-help":
229 msg1(sys.argv)
230else:
231 [fastafile,signal,tmfilter,outfile]=getargs(sys.argv)
232 if fastafile!="" and outfile!="none":
233 sys.stderr.write("Reading fasta file"+"\n")
234 sequences=read(fastafile)
235 sys.stderr.write("Submitting sequences"+"\n")
236 [jobID,fail]=submit(sequences,signal,tmfilter)
237 doneID=[]
238 error=[]
239 running=[]
240 scheduled=[]
241 state=[-1,-1,-1,fail]
242 time.sleep(10)
243 if len(jobID)>0:
244 while 1:
245 sys.stderr.write("Checking results"+"\n")
246 [xml,doneID,error,running,scheduled]=get(xml,jobID,doneID,error,running,scheduled)
247 if len(doneID)==len(jobID):
248 sys.stderr.write("Status:"+"\n")
249 sys.stderr.write(str(len(doneID))+" job is done"+"\n")
250 sys.stderr.write(str(len(running))+" job is running"+"\n")
251 sys.stderr.write(str(len(scheduled))+" job is scheduled"+"\n")
252 if len(error)+fail!=0:
253 sys.stderr.write("Error in "+str(len(error)+fail)+" cases"+"\n")
254 sys.stderr.write("Done"+"\n")
255 break
256 elif len(sequences)==len(error)+fail+len(doneID):
257 sys.stderr.write("Status:"+"\n")
258 sys.stderr.write(str(len(doneID))+" job is done"+"\n")
259 sys.stderr.write(str(len(running))+" job is running"+"\n")
260 sys.stderr.write(str(len(scheduled))+" job is scheduled"+"\n")
261 if len(error)+fail!=0:
262 sys.stderr.write("Error in "+str(len(error)+fail)+" cases"+"\n")
263 sys.stderr.write("Done"+"\n")
264 break
265 else:
266 if state!=[len(doneID),len(running),len(scheduled),len(error)+fail]:
267 sys.stderr.write("status:"+"\n")
268 sys.stderr.write(str(len(doneID))+" job is done"+"\n")
269 sys.stderr.write(str(len(running))+" job is running"+"\n")
270 sys.stderr.write(str(len(scheduled))+" job is scheduled"+"\n")
271 if len(error)+fail!=0:
272 sys.stderr.write("Error in "+str(len(error)+fail)+" cases"+"\n")
273 sys.stderr.write("Please wait"+"\n")
274 else:
275 sys.stderr.write("No change in status"+"\n")
276 state=[len(doneID),len(running),len(scheduled),len(error)+fail]
277 time.sleep(30)
278 else:
279 sys.stderr.write("done"+"\n")
280 if outfile=="":
281 if len(xml)>0:
282 print "<CCTOPItems>"
283 for i in range (0, len(xml)):
284 doc=xml[i]
285 print(ET.tostring(doc, pretty_print=True).strip())
286 print "</CCTOPItems>"
287 elif outfile!="none":
288 outfile.write("<CCTOPItems>"+"\n")
289 for i in range (0, len(xml)):
290 doc=xml[i]
291 outfile.write(ET.tostring(doc, pretty_print=True).strip())
292 outfile.write("\n")
293 outfile.write("</CCTOPItems>"+"\n")
294 outfile.close()
295
296
Step 1. Prerequisites Install HMMTOP C Library and its dependencies (for details: http://hmmtop.enzim.hu) get hmmtop from http://www.enzim.hu/hmmtop add the file to $PATH
Step 2. Building The Core Library cd Lib mkdir build && cd build ml CMake cmake .. make
Step 3. Building CCTOP cd Standalone mkdir build && cd build cmake .. make
ref: http://www.enzim.hu/hmmtop http://cctop.enzim.ttk.mta.hu/