-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
queries for certain ids, in this case two subgenera, returned themselves and species within those subgenera so the while loop never ended change ncbi_downstream to remove any ids from resulting searches that were in the search themselves also changed to after takig bind rows, then taking unique to remove any duplicate rows
- Loading branch information
Showing
8 changed files
with
6,508 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,233 @@ | ||
http_interactions: | ||
- request: | ||
method: get | ||
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=590&api_key=<<entrez_api_token>> | ||
body: | ||
encoding: '' | ||
string: '' | ||
headers: | ||
User-Agent: libcurl/7.64.1 r-curl/4.3 crul/0.9.0 | ||
Accept-Encoding: gzip, deflate | ||
Accept: application/json, text/xml, application/xml, */* | ||
response: | ||
status: | ||
status_code: '200' | ||
message: OK | ||
explanation: Request fulfilled, document follows | ||
headers: | ||
status: HTTP/1.1 200 OK | ||
date: Tue, 17 Mar 2020 23:06:21 GMT | ||
server: Finatra | ||
strict-transport-security: max-age=31536000; includeSubDomains; preload | ||
content-security-policy: upgrade-insecure-requests | ||
x-ratelimit-remaining: '7' | ||
ncbi-phid: 322C6F094A3EACC500002B2282BDADFD.1.1.m_3 | ||
cache-control: private | ||
ncbi-sid: 2F119B11C2C5B43F_8242SID | ||
content-encoding: gzip | ||
x-ratelimit-limit: '10' | ||
access-control-allow-origin: '*' | ||
content-type: text/xml; charset=UTF-8 | ||
set-cookie: ncbi_sid=2F119B11C2C5B43F_8242SID; domain=.nih.gov; path=/; expires=Wed, | ||
17 Mar 2021 23:06:22 GMT | ||
x-ua-compatible: IE=Edge | ||
x-xss-protection: 1; mode=block | ||
transfer-encoding: chunked | ||
body: | ||
encoding: UTF-8 | ||
file: no | ||
string: |- | ||
<?xml version="1.0" ?> | ||
<!DOCTYPE TaxaSet PUBLIC "-//NLM//DTD Taxon, 14th January 2002//EN" "https://www.ncbi.nlm.nih.gov/entrez/query/DTD/taxon.dtd"> | ||
<TaxaSet><Taxon> | ||
<TaxId>590</TaxId> | ||
<ScientificName>Salmonella</ScientificName> | ||
<OtherNames> | ||
<Name> | ||
<ClassCDE>authority</ClassCDE> | ||
<DispName>Salmonella Lignieres 1900</DispName> | ||
</Name> | ||
<Name> | ||
<ClassCDE>misspelling</ClassCDE> | ||
<DispName>Samonella</DispName> | ||
</Name> | ||
</OtherNames> | ||
<ParentTaxId>543</ParentTaxId> | ||
<Rank>genus</Rank> | ||
<Division>Bacteria</Division> | ||
<GeneticCode> | ||
<GCId>11</GCId> | ||
<GCName>Bacterial, Archaeal and Plant Plastid</GCName> | ||
</GeneticCode> | ||
<MitoGeneticCode> | ||
<MGCId>0</MGCId> | ||
<MGCName>Unspecified</MGCName> | ||
</MitoGeneticCode> | ||
<Lineage>cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae</Lineage> | ||
<LineageEx> | ||
<Taxon> | ||
<TaxId>131567</TaxId> | ||
<ScientificName>cellular organisms</ScientificName> | ||
<Rank>no rank</Rank> | ||
</Taxon> | ||
<Taxon> | ||
<TaxId>2</TaxId> | ||
<ScientificName>Bacteria</ScientificName> | ||
<Rank>superkingdom</Rank> | ||
</Taxon> | ||
<Taxon> | ||
<TaxId>1224</TaxId> | ||
<ScientificName>Proteobacteria</ScientificName> | ||
<Rank>phylum</Rank> | ||
</Taxon> | ||
<Taxon> | ||
<TaxId>1236</TaxId> | ||
<ScientificName>Gammaproteobacteria</ScientificName> | ||
<Rank>class</Rank> | ||
</Taxon> | ||
<Taxon> | ||
<TaxId>91347</TaxId> | ||
<ScientificName>Enterobacterales</ScientificName> | ||
<Rank>order</Rank> | ||
</Taxon> | ||
<Taxon> | ||
<TaxId>543</TaxId> | ||
<ScientificName>Enterobacteriaceae</ScientificName> | ||
<Rank>family</Rank> | ||
</Taxon> | ||
</LineageEx> | ||
<CreateDate>1995/02/27 09:24:00</CreateDate> | ||
<UpdateDate>2018/11/23 13:57:51</UpdateDate> | ||
<PubDate>1993/04/21 01:00:00</PubDate> | ||
</Taxon> | ||
</TaxaSet> | ||
recorded_at: 2020-03-17 23:06:23 GMT | ||
recorded_with: vcr/0.5.0.92, webmockr/0.6.0.92 | ||
- request: | ||
method: get | ||
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=taxonomy&term=Salmonella%5BNext%20Level%5D%20AND%20Enterobacteriaceae%5Bsubtree%5D&RetMax=1000&RetStart=0&api_key=<<entrez_api_token>> | ||
body: | ||
encoding: '' | ||
string: '' | ||
headers: | ||
Accept-Encoding: gzip, deflate | ||
Accept: application/json, text/xml, application/xml, */* | ||
User-Agent: r-curl/4.3 crul/0.9.0 rOpenSci(taxize/0.9.92.96) | ||
X-USER-AGENT: r-curl/4.3 crul/0.9.0 rOpenSci(taxize/0.9.92.96) | ||
response: | ||
status: | ||
status_code: '200' | ||
message: OK | ||
explanation: Request fulfilled, document follows | ||
headers: | ||
status: HTTP/1.1 200 OK | ||
date: Tue, 17 Mar 2020 23:06:22 GMT | ||
server: Finatra | ||
strict-transport-security: max-age=31536000; includeSubDomains; preload | ||
content-security-policy: upgrade-insecure-requests | ||
x-ratelimit-remaining: '7' | ||
ncbi-phid: 322C6F094A3EACC5000057228329FDCD.1.1.m_1 | ||
cache-control: private | ||
ncbi-sid: D70139B1B6121A77_10D1SID | ||
content-encoding: gzip | ||
x-ratelimit-limit: '10' | ||
access-control-allow-origin: '*' | ||
content-type: text/xml; charset=UTF-8 | ||
set-cookie: ncbi_sid=D70139B1B6121A77_10D1SID; domain=.nih.gov; path=/; expires=Wed, | ||
17 Mar 2021 23:06:22 GMT | ||
x-ua-compatible: IE=Edge | ||
x-xss-protection: 1; mode=block | ||
transfer-encoding: chunked | ||
body: | ||
encoding: UTF-8 | ||
file: no | ||
string: | | ||
<?xml version="1.0" encoding="UTF-8" ?> | ||
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd"> | ||
<eSearchResult><Count>5</Count><RetMax>5</RetMax><RetStart>0</RetStart><IdList> | ||
<Id>2614656</Id> | ||
<Id>2588448</Id> | ||
<Id>263770</Id> | ||
<Id>54736</Id> | ||
<Id>28901</Id> | ||
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>Salmonella[Next Level]</Term> <Field>Next Level</Field> <Count>5</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>Enterobacteriaceae[subtree]</Term> <Field>subtree</Field> <Count>20383</Count> <Explode>N</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>Salmonella[Next Level] AND Enterobacteriaceae[subtree]</QueryTranslation></eSearchResult> | ||
recorded_at: 2020-03-17 23:06:23 GMT | ||
recorded_with: vcr/0.5.0.92, webmockr/0.6.0.92 | ||
- request: | ||
method: get | ||
uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=2614656%2B2588448%2B263770%2B54736%2B28901&api_key=<<entrez_api_token>> | ||
body: | ||
encoding: '' | ||
string: '' | ||
headers: | ||
Accept-Encoding: gzip, deflate | ||
Accept: application/json, text/xml, application/xml, */* | ||
User-Agent: r-curl/4.3 crul/0.9.0 rOpenSci(taxize/0.9.92.96) | ||
X-USER-AGENT: r-curl/4.3 crul/0.9.0 rOpenSci(taxize/0.9.92.96) | ||
response: | ||
status: | ||
status_code: '200' | ||
message: OK | ||
explanation: Request fulfilled, document follows | ||
headers: | ||
status: HTTP/1.1 200 OK | ||
date: Tue, 17 Mar 2020 23:06:21 GMT | ||
server: Finatra | ||
strict-transport-security: max-age=31536000; includeSubDomains; preload | ||
content-security-policy: upgrade-insecure-requests | ||
x-ratelimit-remaining: '7' | ||
ncbi-phid: 322C6F094A3EACC50000462283D1BCF7.1.1.m_1 | ||
cache-control: private | ||
ncbi-sid: 635BE0A98532F318_CF43SID | ||
content-encoding: gzip | ||
x-ratelimit-limit: '10' | ||
access-control-allow-origin: '*' | ||
content-type: text/xml; charset=UTF-8 | ||
set-cookie: ncbi_sid=635BE0A98532F318_CF43SID; domain=.nih.gov; path=/; expires=Wed, | ||
17 Mar 2021 23:06:22 GMT | ||
x-ua-compatible: IE=Edge | ||
x-xss-protection: 1; mode=block | ||
transfer-encoding: chunked | ||
body: | ||
encoding: UTF-8 | ||
file: no | ||
string: "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<!DOCTYPE eSummaryResult | ||
PUBLIC \"-//NLM//DTD esummary v1 20041029//EN\" \"https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20041029/esummary-v1.dtd\">\n<eSummaryResult>\n<DocSum>\n\t<Id>2614656</Id>\n\t<Item | ||
Name=\"Status\" Type=\"String\">active</Item>\n\t<Item Name=\"Rank\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"Division\" Type=\"String\">enterobacteria</Item>\n\t<Item Name=\"ScientificName\" | ||
Type=\"String\">unclassified Salmonella</Item>\n\t<Item Name=\"CommonName\" | ||
Type=\"String\"></Item>\n\t<Item Name=\"TaxId\" Type=\"Integer\">2614656</Item>\n\t<Item | ||
Name=\"AkaTaxId\" Type=\"Integer\">0</Item>\n\t<Item Name=\"Genus\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"Species\" Type=\"String\"></Item>\n\t<Item Name=\"Subsp\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"ModificationDate\" Type=\"Date\">2019/09/22 00:00</Item>\n</DocSum>\n\n<DocSum>\n\t<Id>2588448</Id>\n\t<Item | ||
Name=\"Status\" Type=\"String\">active</Item>\n\t<Item Name=\"Rank\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"Division\" Type=\"String\">enterobacteria</Item>\n\t<Item Name=\"ScientificName\" | ||
Type=\"String\">Salmonella sp. SH11G0791</Item>\n\t<Item Name=\"CommonName\" | ||
Type=\"String\"></Item>\n\t<Item Name=\"TaxId\" Type=\"Integer\">2588448</Item>\n\t<Item | ||
Name=\"AkaTaxId\" Type=\"Integer\">0</Item>\n\t<Item Name=\"Genus\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"Species\" Type=\"String\"></Item>\n\t<Item Name=\"Subsp\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"ModificationDate\" Type=\"Date\">2019/06/13 00:00</Item>\n</DocSum>\n\n<DocSum>\n\t<Id>263770</Id>\n\t<Item | ||
Name=\"Status\" Type=\"String\">active</Item>\n\t<Item Name=\"Rank\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"Division\" Type=\"String\">enterobacteria</Item>\n\t<Item Name=\"ScientificName\" | ||
Type=\"String\">environmental samples</Item>\n\t<Item Name=\"CommonName\" | ||
Type=\"String\"></Item>\n\t<Item Name=\"TaxId\" Type=\"Integer\">263770</Item>\n\t<Item | ||
Name=\"AkaTaxId\" Type=\"Integer\">0</Item>\n\t<Item Name=\"Genus\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"Species\" Type=\"String\"></Item>\n\t<Item Name=\"Subsp\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"ModificationDate\" Type=\"Date\">2018/10/18 00:00</Item>\n</DocSum>\n\n<DocSum>\n\t<Id>54736</Id>\n\t<Item | ||
Name=\"Status\" Type=\"String\">active</Item>\n\t<Item Name=\"Rank\" Type=\"String\">species</Item>\n\t<Item | ||
Name=\"Division\" Type=\"String\">enterobacteria</Item>\n\t<Item Name=\"ScientificName\" | ||
Type=\"String\">Salmonella bongori</Item>\n\t<Item Name=\"CommonName\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"TaxId\" Type=\"Integer\">54736</Item>\n\t<Item Name=\"AkaTaxId\" Type=\"Integer\">0</Item>\n\t<Item | ||
Name=\"Genus\" Type=\"String\">Salmonella</Item>\n\t<Item Name=\"Species\" | ||
Type=\"String\">bongori</Item>\n\t<Item Name=\"Subsp\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"ModificationDate\" Type=\"Date\">2018/10/31 00:00</Item>\n</DocSum>\n\n<DocSum>\n\t<Id>28901</Id>\n\t<Item | ||
Name=\"Status\" Type=\"String\">active</Item>\n\t<Item Name=\"Rank\" Type=\"String\">species</Item>\n\t<Item | ||
Name=\"Division\" Type=\"String\">enterobacteria</Item>\n\t<Item Name=\"ScientificName\" | ||
Type=\"String\">Salmonella enterica</Item>\n\t<Item Name=\"CommonName\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"TaxId\" Type=\"Integer\">28901</Item>\n\t<Item Name=\"AkaTaxId\" Type=\"Integer\">0</Item>\n\t<Item | ||
Name=\"Genus\" Type=\"String\">Salmonella</Item>\n\t<Item Name=\"Species\" | ||
Type=\"String\">enterica</Item>\n\t<Item Name=\"Subsp\" Type=\"String\"></Item>\n\t<Item | ||
Name=\"ModificationDate\" Type=\"Date\">2019/10/07 00:00</Item>\n</DocSum>\n\n</eSummaryResult>\n" | ||
recorded_at: 2020-03-17 23:06:23 GMT | ||
recorded_with: vcr/0.5.0.92, webmockr/0.6.0.92 |
Oops, something went wrong.