07. January 2021
HMMER
using HMMER with Pfam
hmmscan - search sequence(s) against a profile database
Pfam cache on GACRC was used in the following example
1hmmscan --tblout test-tbl.tab --domtblout test-domtbl.tab --pfamtblout test-pfamtbl.tab /db/pfam/27.0-3.1b1/Pfam-A.hmm test.fa
hmmer on several proteom
set up path into variable in a file called genome_path_var.sh, these proteome fasta files were downloaded from JGI phytozome
1#!/usr/bin/env bash
2
3genomeList='ppopv1 p717main p717alt pPtriNisquallyV4 pPdelWV94 pPtriStettler pSpurV5 pSpurFC pAtha pGmax pMtru pEgra pPper pSlyc pVvin pSbic pSita pOsat'
4
5ppopv1='/work/cjtlab/popv1/Poptr1_1_GeneModels_GeneCatalog_frozen20080522_aa.fasta'
6p717main='/work/cjtlab/717genomeV2/annotation/main.prelim.v2.1/PtremulaxPopulusalbav2.1p.primaryTrs.pep.fa'
7p717alt='/work/cjtlab/717genomeV2/annotation/alt.prelim.v2.1/PtremulaxPopulusalbaaltv2.1.primaryTrs.pep.fa'
8pPtriNisquallyV4='/work/cjtlab/NisquallyV4/annotation/Ptrichocarpav4.1g.primaryTrs.pep.fa'
9pPdelWV94='/work/cjtlab/Pdelt/PdeltoidesWV94_445_v2.1/PdeltoidesWV94_445_v2.1.protein_primaryTranscriptOnly.fa'
10pPtriStettler='/work/cjtlab/Database/Ptri_stettlerV1.1/annotation/PtrichocarpaStettler14_532_v1.1.protein.fa'
11pSpurV5='/work/cjtlab/Spurpurea94006/Spurpurea_519_v5.1.protein.fa'
12pSpurFC='/work/cjtlab/SpurpureaFC/annotation/SpurpureaFishCreek_518_v3.1.protein.fa'
13pAtha='/work/cjtlab/Database/Atha/annotation/Athaliana_447_Araport11.protein.fa'
14pGmax='/work/cjtlab/Database/Gmax/annotation/Gmax_275_Wm82.a2.v1.protein.fa'
15pMtru='/work/cjtlab/Database/Mtru/annotation/Mtruncatula_285_Mt4.0v1.protein.fa'
16pEgra='/work/cjtlab/Database/Egrandis/v2.0/annotation/Egrandis_297_v2.0.protein.fa'
17pPper='/work/cjtlab/Database/Pper/annotation/Ppersica_298_v2.1.protein.fa'
18pSlyc='/work/cjtlab/Database/Slycopersicum/ITAG3.2/annotation/Slycopersicum_514_ITAG3.2.protein.fa'
19pVvin='/work/cjtlab/Database/Vvinifera/v2.1/annotation/Vvinifera_457_v2.1.protein.fa'
20pSbic='/work/cjtlab/Database/Sbicolor/v3.1.1/annotation/Sbicolor_454_v3.1.1.protein.fa'
21pSita='/work/cjtlab/Database/Sitalica/v2.2/annotation/Sitalica_312_v2.2.protein.fa'
22pOsat='/work/cjtlab/Database/Osativa/v7.0/annotation/Osativa_323_v7.0.protein.fa'
23
24echo 'genome path loaded'
25
import those variables, iterate them through a list of the variables
1#!/usr/bin/env bash
2source /work/cjtlab/scripts/bash/genome_path_var.sh
3
4for species in $genomeList
5do
6 eval 'hmmscan --tblout $species-tbl.tab --domtblout $species-domtbl.tab --pfamtblout $species-pfamtbl.tab /db/pfam/27.0-3.1b1/Pfam-A.hmm $'{$species}
7
8## here '$species' will become the name of the variable
9## while '$'{$species} will be the content of the variable, the complete path
10done
11
12