Skip to content

Commit 84b418a

Browse files
authored
MRG: Merge pull request #669 from octue/speed-up-event-replaying
Speed up event replaying
2 parents b3cfa00 + a0b68d4 commit 84b418a

19 files changed

+413
-217
lines changed

docs/source/inter_service_compatibility.rst

+83-81
Large diffs are not rendered by default.

octue/cloud/emulators/_pub_sub.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def __init__(self, *args, **kwargs):
6363
super().__init__(*args, **kwargs)
6464
self._subscriber = MockSubscriber()
6565

66+
@property
67+
def subscriber(self):
68+
return self._subscriber
69+
6670
def create(self, allow_existing=False):
6771
"""Register the subscription in the global subscriptions dictionary.
6872
@@ -78,11 +82,14 @@ def create(self, allow_existing=False):
7882
self._created = True
7983

8084
def delete(self):
81-
"""Do nothing.
85+
"""Delete the subscription from the global subscriptions dictionary.
8286
8387
:return None:
8488
"""
85-
pass
89+
try:
90+
SUBSCRIPTIONS.remove(self.name)
91+
except KeyError:
92+
pass
8693

8794
def exists(self, timeout=5):
8895
"""Check if the subscription exists in the global subscriptions dictionary.
@@ -310,7 +317,6 @@ def __init__(self, backend, service_id=None, run_function=None, children=None, *
310317
super().__init__(backend, service_id, run_function, *args, **kwargs)
311318
self.children = children or {}
312319
self._publisher = MockPublisher()
313-
self.subscriber = MockSubscriber()
314320

315321
@property
316322
def publisher(self):

octue/cloud/events/handler.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class AbstractEventHandler:
3939
:param dict|None event_handlers: a mapping of event type names to callables that handle each type of event. The handlers must not mutate the events.
4040
:param dict schema: the JSON schema to validate events against
4141
:param bool include_service_metadata_in_logs: if `True`, include the SRUIDs and question UUIDs of the service revisions involved in the question to the start of the log message
42-
:param bool only_handle_result: if `True`, skip handling non-result events and only handle the "result" event when received
42+
:param bool only_handle_result: if `True`, skip handling non-result events and only handle the "result" event when received (turning this on speeds up event handling)
43+
:param bool validate_events: if `True`, validate events before attempting to handle them (turning this off speeds up event handling)
4344
:return None:
4445
"""
4546

@@ -51,12 +52,14 @@ def __init__(
5152
schema=SERVICE_COMMUNICATION_SCHEMA,
5253
include_service_metadata_in_logs=True,
5354
only_handle_result=False,
55+
validate_events=True,
5456
):
5557
self.handle_monitor_message = handle_monitor_message
5658
self.record_events = record_events
5759
self.schema = schema
5860
self.include_service_metadata_in_logs = include_service_metadata_in_logs
5961
self.only_handle_result = only_handle_result
62+
self.validate_events = validate_events
6063

6164
self.handled_events = []
6265
self._start_time = None
@@ -113,7 +116,7 @@ def _extract_and_validate_event(self, container):
113116
recipient = attributes.get("recipient")
114117
child_sdk_version = attributes.get("sender_sdk_version")
115118

116-
if not is_event_valid(
119+
if self.validate_events and not is_event_valid(
117120
event=event,
118121
attributes=attributes,
119122
recipient=recipient,
@@ -125,8 +128,8 @@ def _extract_and_validate_event(self, container):
125128

126129
logger.debug(
127130
"%r: Received an event related to question %r.",
128-
attributes["recipient"],
129-
attributes["question_uuid"],
131+
attributes.get("recipient"),
132+
attributes.get("question_uuid"),
130133
)
131134

132135
return (event, attributes)

octue/cloud/events/replayer.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ class EventReplayer(AbstractEventHandler):
1515
:param dict|None event_handlers: a mapping of event type names to callables that handle each type of event. The handlers must not mutate the events.
1616
:param dict|str schema: the JSON schema to validate events against
1717
:param bool include_service_metadata_in_logs: if `True`, include the SRUIDs and question UUIDs of the service revisions involved in the question to the start of the log message
18-
:param bool only_handle_result: if `True`, skip non-result events and only handle the "result" event if present
18+
:param bool only_handle_result: if `True`, skip non-result events and only handle the "result" event if present (turning this on speeds up event handling)
19+
:param bool validate_events: if `True`, validate events before attempting to handle them (turning this off speeds up event handling)
1920
:return None:
2021
"""
2122

@@ -27,6 +28,7 @@ def __init__(
2728
schema=SERVICE_COMMUNICATION_SCHEMA,
2829
include_service_metadata_in_logs=True,
2930
only_handle_result=False,
31+
validate_events=True,
3032
):
3133
event_handlers = event_handlers or {
3234
"question": self._handle_question,
@@ -45,6 +47,7 @@ def __init__(
4547
schema=schema,
4648
include_service_metadata_in_logs=include_service_metadata_in_logs,
4749
only_handle_result=only_handle_result,
50+
validate_events=validate_events,
4851
)
4952

5053
def handle_events(self, events):
@@ -56,6 +59,10 @@ def handle_events(self, events):
5659
super().handle_events()
5760

5861
for event in events:
62+
# Skip validation and handling of other event kinds if only the result event is wanted.
63+
if self.only_handle_result and event.get("event", {}).get("kind") != "result":
64+
continue
65+
5966
event, attributes = self._extract_and_validate_event(event)
6067

6168
# Skip the event if it fails validation.

octue/cloud/pub_sub/events.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import time
55
from datetime import datetime, timedelta
6+
from functools import cached_property
67

78
from google.api_core import retry
89
from google.cloud.pubsub_v1 import SubscriberClient
@@ -91,11 +92,20 @@ def __init__(
9192
include_service_metadata_in_logs=include_service_metadata_in_logs,
9293
)
9394

94-
self._subscriber = SubscriberClient()
9595
self._heartbeat_checker = None
9696
self._last_heartbeat = None
9797
self._alive = True
9898

99+
@cached_property
100+
def subscriber(self):
101+
"""Get or instantiate the subscriber client. The client isn't instantiated until this property is called for the
102+
first time. This allows checking for the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to be put off
103+
until it's needed.
104+
105+
:return google.cloud.pubsub_v1.SubscriberClient:
106+
"""
107+
return SubscriberClient()
108+
99109
@property
100110
def total_run_time(self):
101111
"""The amount of time elapsed since `self.handle_events` was called. If it hasn't been called yet, this is
@@ -156,7 +166,7 @@ def handle_events(self, timeout=60, maximum_heartbeat_interval=300):
156166

157167
finally:
158168
self._heartbeat_checker.cancel()
159-
self._subscriber.close()
169+
self.subscriber.close()
160170

161171
if self.handled_events:
162172
last_event = self.handled_events[-1]
@@ -221,7 +231,7 @@ def _pull_available_events(self, timeout):
221231
while self._alive:
222232
logger.debug("Pulling events from Google Pub/Sub: attempt %d.", attempt)
223233

224-
pull_response = self._subscriber.pull(
234+
pull_response = self.subscriber.pull(
225235
request={"subscription": self.subscription.path, "max_messages": MAX_SIMULTANEOUS_MESSAGES_PULL},
226236
retry=retry.Retry(),
227237
)
@@ -240,7 +250,7 @@ def _pull_available_events(self, timeout):
240250
if not pull_response.received_messages:
241251
return []
242252

243-
self._subscriber.acknowledge(
253+
self.subscriber.acknowledge(
244254
request={
245255
"subscription": self.subscription.path,
246256
"ack_ids": [message.ack_id for message in pull_response.received_messages],

octue/cloud/pub_sub/service.py

+11-19
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ def __init__(self, backend, service_id=None, run_function=None, service_registri
8484

8585
self._pub_sub_id = convert_service_id_to_pub_sub_form(self.id)
8686
self._local_sdk_version = importlib.metadata.version("octue")
87-
self._publisher = None
88-
self._services_topic = None
8987
self._event_handler = None
9088

9189
def __repr__(self):
@@ -95,23 +93,20 @@ def __repr__(self):
9593
"""
9694
return f"<{type(self).__name__}({self.id!r})>"
9795

98-
@property
96+
@functools.cached_property
9997
def publisher(self):
10098
"""Get or instantiate the publisher client for the service. No publisher is instantiated until this property is
10199
called for the first time. This allows checking for the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to
102100
be put off until it's needed.
103101
104102
:return google.cloud.pubsub_v1.PublisherClient:
105103
"""
106-
if not self._publisher:
107-
self._publisher = pubsub_v1.PublisherClient(
108-
batch_settings=BATCH_SETTINGS,
109-
publisher_options=pubsub_v1.types.PublisherOptions(enable_message_ordering=True),
110-
)
111-
112-
return self._publisher
104+
return pubsub_v1.PublisherClient(
105+
batch_settings=BATCH_SETTINGS,
106+
publisher_options=pubsub_v1.types.PublisherOptions(enable_message_ordering=True),
107+
)
113108

114-
@property
109+
@functools.cached_property
115110
def services_topic(self):
116111
"""Get the Octue services topic that all events in the project are published to. No topic is instantiated until
117112
this property is called for the first time. This allows checking for the `GOOGLE_APPLICATION_CREDENTIALS`
@@ -120,15 +115,12 @@ def services_topic(self):
120115
:raise octue.exceptions.ServiceNotFound: if the topic doesn't exist in the project
121116
:return octue.cloud.pub_sub.topic.Topic: the Octue services topic for the project
122117
"""
123-
if not self._services_topic:
124-
topic = Topic(name=OCTUE_SERVICES_PREFIX, project_name=self.backend.project_name)
125-
126-
if not topic.exists():
127-
raise octue.exceptions.ServiceNotFound(f"{topic!r} cannot be found.")
118+
topic = Topic(name=OCTUE_SERVICES_PREFIX, project_name=self.backend.project_name)
128119

129-
self._services_topic = topic
120+
if not topic.exists():
121+
raise octue.exceptions.ServiceNotFound(f"{topic!r} cannot be found.")
130122

131-
return self._services_topic
123+
return topic
132124

133125
@property
134126
def received_events(self):
@@ -533,7 +525,7 @@ def _emit_event(
533525
attributes.update(
534526
{
535527
"uuid": str(uuid.uuid4()),
536-
"datetime": datetime.datetime.utcnow().isoformat(),
528+
"datetime": datetime.datetime.now(tz=datetime.timezone.utc).isoformat(),
537529
"question_uuid": question_uuid,
538530
"parent_question_uuid": parent_question_uuid,
539531
"originator_question_uuid": originator_question_uuid,

octue/cloud/pub_sub/subscription.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from functools import cached_property
23

34
import google.api_core.exceptions
45
from google.cloud.pubsub_v1 import SubscriberClient
@@ -72,9 +73,18 @@ def __init__(
7273

7374
self.push_endpoint = push_endpoint
7475
self.enable_message_ordering = enable_message_ordering
75-
self._subscriber = SubscriberClient()
7676
self._created = False
7777

78+
@cached_property
79+
def subscriber(self):
80+
"""Get or instantiate the subscriber client. The client isn't instantiated until this property is called for the
81+
first time. This allows checking for the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to be put off
82+
until it's needed.
83+
84+
:return google.cloud.pubsub_v1.SubscriberClient:
85+
"""
86+
return SubscriberClient()
87+
7888
@property
7989
def creation_triggered_locally(self):
8090
"""Was the subscription successfully created by calling `self.create` locally? This is `False` if its creation
@@ -116,13 +126,13 @@ def create(self, allow_existing=False):
116126
subscription = self._create_proto_message_subscription()
117127

118128
if not allow_existing:
119-
subscription = self._subscriber.create_subscription(request=subscription)
129+
subscription = self.subscriber.create_subscription(request=subscription)
120130
self._created = True
121131
self._log_creation()
122132
return subscription
123133

124134
try:
125-
subscription = self._subscriber.create_subscription(request=subscription)
135+
subscription = self.subscriber.create_subscription(request=subscription)
126136
self._created = True
127137
except google.api_core.exceptions.AlreadyExists:
128138
pass
@@ -135,7 +145,7 @@ def update(self):
135145
136146
:return None:
137147
"""
138-
self._subscriber.update_subscription(
148+
self.subscriber.update_subscription(
139149
request=UpdateSubscriptionRequest(
140150
mapping=None,
141151
subscription=self._create_proto_message_subscription(), # noqa
@@ -156,7 +166,7 @@ def delete(self):
156166
157167
:return None:
158168
"""
159-
self._subscriber.delete_subscription(subscription=self.path)
169+
self.subscriber.delete_subscription(subscription=self.path)
160170
logger.info("Subscription %r deleted.", self.path)
161171

162172
def exists(self, timeout=5):
@@ -166,7 +176,7 @@ def exists(self, timeout=5):
166176
:return bool:
167177
"""
168178
try:
169-
self._subscriber.get_subscription(subscription=self.path, timeout=timeout)
179+
self.subscriber.get_subscription(subscription=self.path, timeout=timeout)
170180
return True
171181
except google.api_core.exceptions.NotFound:
172182
return False

octue/cloud/pub_sub/topic.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import time
33
from datetime import datetime
4+
from functools import cached_property
45

56
import google.api_core.exceptions
67
from google.cloud.pubsub_v1 import PublisherClient
@@ -24,9 +25,18 @@ def __init__(self, name, project_name):
2425
self.name = name
2526
self.project_name = project_name
2627
self.path = self.generate_topic_path(self.project_name, self.name)
27-
self._publisher = PublisherClient()
2828
self._created = False
2929

30+
@cached_property
31+
def publisher(self):
32+
"""Get or instantiate the publisher client. The client isn't instantiated until this property is called for the
33+
first time. This allows checking for the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to be put off
34+
until it's needed.
35+
36+
:return google.cloud.pubsub_v1.PublisherClient:
37+
"""
38+
return PublisherClient()
39+
3040
@property
3141
def creation_triggered_locally(self):
3242
"""Was the topic successfully created by calling `self.create` locally? This is `False` if its creation was
@@ -52,15 +62,15 @@ def create(self, allow_existing=False):
5262
posix_timestamp_with_no_decimals = str(datetime.now().timestamp()).split(".")[0]
5363

5464
if not allow_existing:
55-
self._publisher.create_topic(
65+
self.publisher.create_topic(
5666
request=Topic_(name=self.path, labels={"created": posix_timestamp_with_no_decimals})
5767
)
5868
self._created = True
5969
self._log_creation()
6070
return
6171

6272
try:
63-
self._publisher.create_topic(
73+
self.publisher.create_topic(
6474
request=Topic_(name=self.path, labels={"created": posix_timestamp_with_no_decimals})
6575
)
6676
self._created = True
@@ -74,14 +84,14 @@ def get_subscriptions(self):
7484
7585
:return list(str):
7686
"""
77-
return list(self._publisher.list_topic_subscriptions(topic=self.path))
87+
return list(self.publisher.list_topic_subscriptions(topic=self.path))
7888

7989
def delete(self):
8090
"""Delete the topic from Google Pub/Sub.
8191
8292
:return None:
8393
"""
84-
self._publisher.delete_topic(topic=self.path)
94+
self.publisher.delete_topic(topic=self.path)
8595
logger.info("Topic %r deleted.", self.path)
8696

8797
def exists(self, timeout=10):
@@ -94,7 +104,7 @@ def exists(self, timeout=10):
94104

95105
while time.time() - start_time <= timeout:
96106
try:
97-
self._publisher.get_topic(topic=self.path)
107+
self.publisher.get_topic(topic=self.path)
98108
return True
99109
except google.api_core.exceptions.NotFound:
100110
time.sleep(1)

0 commit comments

Comments
 (0)