Commit 9d604c6c authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files

updated make_ref to set gene symbol instead of ensg in sym2ref when using ensembl

parent f5e1eb50
...@@ -278,12 +278,21 @@ def processEnsembl(fastaString, fastaOut, annotOut): ...@@ -278,12 +278,21 @@ def processEnsembl(fastaString, fastaOut, annotOut):
# Split track name by ' '. Transcript ID is on 1st field, gene symbol is on 7th field. # Split track name by ' '. Transcript ID is on 1st field, gene symbol is on 7th field.
ls = line.decode("utf-8").split(" ") ls = line.decode("utf-8").split(" ")
fastaMod += ls[0]+"\n" fastaMod += ls[0]+"\n"
geneField = 1 geneSymbol = 1
enst = 1
found = False
for i in range(1,len(ls)): for i in range(1,len(ls)):
if(ls[i].startswith("gene:")): if(ls[i].startswith("gene:")):
geneField = i enst = i
continue
if(ls[i].startswith("gene_symbol:")):
geneSymbol = i
found = True
break break
geneName = ls[geneField].split(":")[1].rstrip('\n') if(not found):
geneName = ls[enst].split(":")[1].rstrip('\n')
else:
geneName = ls[geneSymbol].split(":")[1].rstrip('\n')
if(not geneName in gene2transcripts): if(not geneName in gene2transcripts):
gene2transcripts[geneName] = set() gene2transcripts[geneName] = set()
gene2transcripts[geneName].add(ls[0][1:]) gene2transcripts[geneName].add(ls[0][1:])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment