Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
bird_pipeline_registry
microSysMics
Commits
0e00228a
Commit
0e00228a
authored
Feb 05, 2021
by
Erwan DELAGE
Browse files
Fix bug when exporting taxonomy with UNITE database
parent
ed19ffd7
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/format_taxo.py
View file @
0e00228a
...
...
@@ -6,19 +6,23 @@ if __name__ == "__main__":
# Load taxo file
TAXONOMY_FILE
=
sys
.
argv
[
1
]
taxo
=
pd
.
read_csv
(
TAXONOMY_FILE
,
sep
=
"
\t
"
,
index_col
=
0
)
try
:
try
:
taxo
.
drop
(
"#q2:types"
,
inplace
=
True
)
except
KeyError
:
except
KeyError
:
pass
# Associate taxorank with corresponding prefix letter
taxo
[
"Domain"
]
=
""
if
taxo
.
iloc
[
1
][
"Taxon"
].
startswith
(
"k__"
):
taxo
[
"Kingdom"
]
=
""
# Unite uses Kingdom rather than Domain
else
:
taxo
[
"Domain"
]
=
""
taxo
[
"Phylum"
]
=
""
taxo
[
"Class"
]
=
""
taxo
[
"Order"
]
=
""
taxo
[
"Family"
]
=
""
taxo
[
"Genus"
]
=
""
taxo
[
"Species"
]
=
""
taxorank_dict
=
{
"d"
:
"Domain"
,
"p"
:
"Phylum"
,
"c"
:
"Class"
,
taxorank_dict
=
{
"k"
:
"Kingdom"
,
"d"
:
"Domain"
,
"p"
:
"Phylum"
,
"c"
:
"Class"
,
"o"
:
"Order"
,
"f"
:
"Family"
,
"g"
:
"Genus"
,
"s"
:
"Species"
}
# Create a column for each taxorank
...
...
@@ -32,4 +36,4 @@ if __name__ == "__main__":
continue
# Export taxo
taxo
.
to_csv
(
sys
.
argv
[
1
],
sep
=
"
\t
"
)
taxo
.
to_csv
(
sys
.
argv
[
1
]
+
".tsv"
,
sep
=
"
\t
"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment