Skip to content

Commit 84961cf

Browse files
authored
Merge pull request #133 from Laerte/master
fix: Header name Zyte-Error → Zyte-Error-Type
1 parent 8b16f37 commit 84961cf

File tree

5 files changed

+33
-20
lines changed

5 files changed

+33
-20
lines changed

.github/workflows/main.yml

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ jobs:
5555
cache: pip
5656
- name: Install dependencies
5757
run: |
58+
sudo apt update -y && sudo apt install -y gcc-9
5859
python -m pip install --upgrade pip
5960
pip install tox codecov
6061
- name: Run tests

docs/headers.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Translation is supported for the following headers:
2525
Zyte API Zyte Smart Proxy Manager
2626
========================= ===========================
2727
``Zyte-Device`` ``X-Crawlera-Profile``
28-
``Zyte-Error`` ``X-Crawlera-Error``
28+
``Zyte-Error-Type`` ``X-Crawlera-Error``
2929
``Zyte-Geolocation`` ``X-Crawlera-Region``
3030
``Zyte-JobId`` ``X-Crawlera-JobId``
3131
``Zyte-Override-Headers`` ``X-Crawlera-Profile-Pass``

scrapy_zyte_smartproxy/middleware.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def _is_banned(self, response):
307307
return (
308308
response.status == self.ban_code
309309
and response.headers.get("X-Crawlera-Error") == b"banned"
310-
) or (response.status in {520, 521} and response.headers.get("Zyte-Error"))
310+
) or (response.status in {520, 521} and response.headers.get("Zyte-Error-Type"))
311311

312312
def _is_auth_error(self, response):
313313
return (
@@ -316,21 +316,21 @@ def _is_auth_error(self, response):
316316
)
317317

318318
def _throttle_error(self, response):
319-
error = response.headers.get("Zyte-Error") or response.headers.get(
319+
error = response.headers.get("Zyte-Error-Type") or response.headers.get(
320320
"X-Crawlera-Error"
321321
)
322322
if response.status in {429, 503} and error and error != b"banned":
323323
return error.decode("utf-8")
324324
return None
325325

326326
def _process_error(self, response):
327-
if "Zyte-Error" in response.headers:
328-
value = response.headers.get("Zyte-Error")
327+
if "Zyte-Error-Type" in response.headers:
328+
value = response.headers.get("Zyte-Error-Type")
329329
response.headers["X-Crawlera-Error"] = value
330330
return value
331331
if "X-Crawlera-Error" in response.headers:
332332
value = response.headers.get("X-Crawlera-Error")
333-
response.headers["Zyte-Error"] = value
333+
response.headers["Zyte-Error-Type"] = value
334334
return value
335335
return None
336336

@@ -480,10 +480,10 @@ def _get_url_domain(self, url):
480480
return parsed.netloc
481481

482482
def _is_zyte_smartproxy_or_zapi_response(self, response):
483+
"""Check if is Smart Proxy Manager or Zyte API proxy mode response"""
483484
return (
484485
"X-Crawlera-Version" in response.headers
485486
or "Zyte-Request-Id" in response.headers
486-
or "zyte-error-type" in response.headers
487487
)
488488

489489
def _get_slot_key(self, request):

tests/test_all.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
("X-Crawlera-Version", ""),
3030
("X-Crawlera-Version", "1.36.3-cd5e44"),
3131
("Zyte-Request-Id", "123456789"),
32-
("zyte-error-type", "foo"),
3332
)
3433

3534

@@ -160,7 +159,7 @@ def _assert_enabled(
160159
)
161160
assert mw.process_response(req, res, spider) is res
162161
assert res.headers["X-Crawlera-Error"] == b"banned"
163-
assert res.headers["Zyte-Error"] == b"banned"
162+
assert res.headers["Zyte-Error-Type"] == b"banned"
164163

165164
# max bans reached and close_spider called
166165
self.assertEqual(crawler.engine.fake_spider_closed_result, (spider, "banned"))
@@ -482,7 +481,7 @@ def _test_stats(self, settings, prefix):
482481
)
483482

484483
res = self._mock_zyte_smartproxy_response(
485-
req.url, status=mw.ban_code, headers={"Zyte-Error": "somethingbad"}
484+
req.url, status=mw.ban_code, headers={"Zyte-Error-Type": "somethingbad"}
486485
)
487486
assert mw.process_response(req, res, spider) is res
488487
self.assertEqual(crawler.stats.get_value("{}/response".format(prefix)), 2)
@@ -497,7 +496,7 @@ def _test_stats(self, settings, prefix):
497496
crawler.stats.get_value("{}/response/error/somethingbad".format(prefix)), 1
498497
)
499498
self.assertEqual(res.headers["X-Crawlera-Error"], b"somethingbad")
500-
self.assertEqual(res.headers["Zyte-Error"], b"somethingbad")
499+
self.assertEqual(res.headers["Zyte-Error-Type"], b"somethingbad")
501500

502501
res = self._mock_zyte_smartproxy_response(
503502
req.url,
@@ -516,7 +515,7 @@ def _test_stats(self, settings, prefix):
516515
crawler.stats.get_value("{}/response/banned".format(prefix)), 1
517516
)
518517
self.assertEqual(res.headers["X-Crawlera-Error"], b"banned")
519-
self.assertEqual(res.headers["Zyte-Error"], b"banned")
518+
self.assertEqual(res.headers["Zyte-Error-Type"], b"banned")
520519

521520
res = self._mock_zyte_smartproxy_response(
522521
req.url,
@@ -672,7 +671,10 @@ def test_is_banned(self):
672671
res = Response(
673672
req.url,
674673
status=503,
675-
headers={"Zyte-Error": "/limits/over-global-limit"},
674+
headers={
675+
"Zyte-Request-Id": "123456789",
676+
"Zyte-Error-Type": "/limits/over-global-limit",
677+
},
676678
)
677679
res = mw.process_response(req, res, self.spider)
678680
self.assertFalse(mw._is_banned(res))
@@ -681,16 +683,26 @@ def test_is_banned(self):
681683
res = mw.process_response(req, res, self.spider)
682684
self.assertTrue(mw._is_banned(res))
683685
res = Response(
684-
req.url, status=520, headers={"Zyte-Error": "/download/temporary-error"}
686+
req.url,
687+
status=520,
688+
headers={
689+
"Zyte-Request-Id": "123456789",
690+
"Zyte-Error-Type": "/download/temporary-error",
691+
},
685692
)
686693
res = mw.process_response(req, res, self.spider)
694+
assert mw.crawler.stats.get_value("zyte_smartproxy/response/banned") == 1
687695
self.assertTrue(mw._is_banned(res))
688696
res = Response(
689697
req.url,
690698
status=521,
691-
headers={"Zyte-Error": "/download/internal-error"},
699+
headers={
700+
"Zyte-Request-Id": "123456789",
701+
"Zyte-Error-Type": "/download/internal-error",
702+
},
692703
)
693704
res = mw.process_response(req, res, self.spider)
705+
assert mw.crawler.stats.get_value("zyte_smartproxy/response/banned") == 2
694706
self.assertTrue(mw._is_banned(res))
695707

696708
@patch("random.uniform")
@@ -733,23 +745,23 @@ def test_noslaves_delays(self, random_uniform_patch):
733745
over_use_limit_response = self._mock_zyte_smartproxy_response(
734746
ban_url,
735747
status=429,
736-
headers={"Zyte-Error": "/limits/over-user-limit"},
748+
headers={"Zyte-Error-Type": "/limits/over-user-limit"},
737749
)
738750
mw.process_response(noslaves_req, over_use_limit_response, self.spider)
739751
self.assertEqual(slot.delay, backoff_step * 2**1)
740752

741753
over_domain_limit_response = self._mock_zyte_smartproxy_response(
742754
ban_url,
743755
status=429,
744-
headers={"Zyte-Error": "/limits/over-domain-limit"},
756+
headers={"Zyte-Error-Type": "/limits/over-domain-limit"},
745757
)
746758
mw.process_response(noslaves_req, over_domain_limit_response, self.spider)
747759
self.assertEqual(slot.delay, backoff_step * 2**2)
748760

749761
over_global_limit_response = self._mock_zyte_smartproxy_response(
750762
ban_url,
751763
status=503,
752-
headers={"Zyte-Error": "/limits/over-global-limit"},
764+
headers={"Zyte-Error-Type": "/limits/over-global-limit"},
753765
)
754766
mw.process_response(noslaves_req, over_global_limit_response, self.spider)
755767
self.assertEqual(slot.delay, max_delay)

tox.ini

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ commands =
5656
[testenv:twinecheck]
5757
basepython = python3
5858
deps =
59-
twine==5.1.1
60-
build==1.2.2
59+
twine==6.1.0
60+
build==1.2.2.post1
6161
commands =
6262
python -m build --sdist
6363
twine check dist/*

0 commit comments

Comments
 (0)