Skip to content

Commit

Permalink
Add a method to check if the query string is too long and will fail
Browse files Browse the repository at this point in the history
  • Loading branch information
valgur committed Jun 3, 2017
1 parent b104084 commit 6518414
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 35 deletions.
41 changes: 36 additions & 5 deletions sentinelsat/sentinel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from tqdm import tqdm

from six import string_types
from six.moves.urllib.parse import urljoin
from six.moves.urllib.parse import urljoin, quote_plus

from . import __version__ as sentinelsat_version

Expand Down Expand Up @@ -139,10 +139,10 @@ def query_raw(self, query):
except SentinelAPIError as e:
# Queries with length greater than about 2700-3600 characters (depending on content) may
# produce "HTTP status 500 Internal Server Error"
if e.response.status_code == 500 and len(query) > 2700:
self.logger.warning(
"The query likely failed due to its excessive length ({} bytes, the limit is "
"~3000)".format(len(query.encode())))
factor = self.check_query_length(query)
if e.response.status_code == 500 and not e.msg and factor > 0.95 :
e.msg = ("The query likely failed due to its length "
"({:.1%} of the limit)".format(factor))
e.__cause__ = None
raise e
return _parse_opensearch_response(response)
Expand Down Expand Up @@ -402,6 +402,37 @@ def get_products_size(products):
size_total += size_value
return round(size_total, 2)

@staticmethod
def check_query_length(query):
"""Determine whether a query to the OpenSearch API is too long.
The query size limit is dependent on the length of the server's internal query,
which looks like
http://localhost:30333//solr/dhus/select?q=...
&wt=xslt&tr=opensearch_atom.xsl&dhusLongName=Sentinels+Scientific+Data+Hub
&dhusServer=https%3A%2F%2Fscihub.copernicus.eu%2Fapihub%2F&originalQuery=...
&rows=100&start=0&sort=ingestiondate+desc
This function will estimate the size of the "q" and "originalQuery" parameters to
determine whether the query will fail. Their combined length can be at most about
7786 bytes.
Parameters
----------
query : str
The query string
Returns
-------
float
Ratio of the query length to the maximum length
"""
q = query.replace(" ", "%20")
original_query = quote_plus(query)
total_length = len(q) + len(original_query)
return total_length / 7786


class SentinelAPIError(Exception):
"""Invalid responses from DataHub.
Expand Down
23 changes: 11 additions & 12 deletions tests/test_mod.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,26 +202,25 @@ def test_large_query():
@pytest.mark.scihub
def test_too_long_query():
api = SentinelAPI(**_api_kwargs)
logger = logging.getLogger('sentinelsat')
stream = StringIO()
h = logging.StreamHandler(stream)
logger.addHandler(h)

# Test whether our limit calculation is reasonably correct and
# that a relevant error message is provided

def create_query(n):
return " AND ".join([api.format_query(None, "NOW", "NOW")] + ["orbitdirection:descending"] * n)
return api.format_query(None, "NOW", "NOW") + " AND orbitdirection:descending" * n

# Expect no error
api.query_raw(create_query(100))
stream.seek(0)
assert "excessive length" not in stream.read()
q = create_query(116)
assert api.check_query_length(q) < 1.0
api.query_raw(q)

# Expect HTTP status 500 Internal Server Error
# sentinelsat should print a warning in this case
q = create_query(117)
assert api.check_query_length(q) >= 1.0
with pytest.raises(SentinelAPIError) as excinfo:
api.query_raw(create_query(120))
api.query_raw(q)
assert excinfo.value.response.status_code == 500
stream.seek(0)
assert "excessive length" in stream.read()
assert "failed due to its length" in excinfo.value.msg


@pytest.mark.fast
Expand Down
Loading

0 comments on commit 6518414

Please sign in to comment.