@@ -61,7 +61,9 @@ class LogStash::Inputs::AzureBlobStorage < LogStash::Inputs::Base
61
61
# Z00000000000000000000000000000000 2 ]}
62
62
config :interval , :validate => :number , :default => 60
63
63
64
+ # add the filename into the events
64
65
config :addfilename , :validate => :boolean , :default => false , :required => false
66
+
65
67
# debug_until will for a maximum amount of processed messages shows 3 types of log printouts including processed filenames. This is a lightweight alternative to switching the loglevel from info to debug or even trace
66
68
config :debug_until , :validate => :number , :default => 0 , :required => false
67
69
@@ -71,6 +73,9 @@ class LogStash::Inputs::AzureBlobStorage < LogStash::Inputs::Base
71
73
# WAD IIS Grok Pattern
72
74
#config :grokpattern, :validate => :string, :required => false, :default => '%{TIMESTAMP_ISO8601:log_timestamp} %{NOTSPACE:instanceId} %{NOTSPACE:instanceId2} %{IPORHOST:ServerIP} %{WORD:httpMethod} %{URIPATH:requestUri} %{NOTSPACE:requestQuery} %{NUMBER:port} %{NOTSPACE:username} %{IPORHOST:clientIP} %{NOTSPACE:httpVersion} %{NOTSPACE:userAgent} %{NOTSPACE:cookie} %{NOTSPACE:referer} %{NOTSPACE:host} %{NUMBER:httpStatus} %{NUMBER:subresponse} %{NUMBER:win32response} %{NUMBER:sentBytes:int} %{NUMBER:receivedBytes:int} %{NUMBER:timeTaken:int}'
73
75
76
+ # skip learning if you use json and don't want to learn the head and tail, but use either the defaults or configure them.
77
+ config :skip_learning , :validate => :boolean , :default => false , :required => false
78
+
74
79
# The string that starts the JSON. Only needed when the codec is JSON. When partial file are read, the result will not be valid JSON unless the start and end are put back. the file_head and file_tail are learned at startup, by reading the first file in the blob_list and taking the first and last block, this would work for blobs that are appended like nsgflowlogs. The configuration can be set to override the learning. In case learning fails and the option is not set, the default is to use the 'records' as set by nsgflowlogs.
75
80
config :file_head , :validate => :string , :required => false , :default => '{"records":['
76
81
# The string that ends the JSON
@@ -113,34 +118,7 @@ def run(queue)
113
118
@processed = 0
114
119
@regsaved = @processed
115
120
116
- # Try in this order to access the storageaccount
117
- # 1. storageaccount / sas_token
118
- # 2. connection_string
119
- # 3. storageaccount / access_key
120
-
121
- unless connection_string . nil?
122
- conn = connection_string . value
123
- end
124
- unless sas_token . nil?
125
- unless sas_token . value . start_with? ( '?' )
126
- conn = "BlobEndpoint=https://#{ storageaccount } .#{ dns_suffix } ;SharedAccessSignature=#{ sas_token . value } "
127
- else
128
- conn = sas_token . value
129
- end
130
- end
131
- unless conn . nil?
132
- @blob_client = Azure ::Storage ::Blob ::BlobService . create_from_connection_string ( conn )
133
- else
134
- # unless use_development_storage?
135
- @blob_client = Azure ::Storage ::Blob ::BlobService . create (
136
- storage_account_name : storageaccount ,
137
- storage_dns_suffix : dns_suffix ,
138
- storage_access_key : access_key . value ,
139
- )
140
- # else
141
- # @logger.info("not yet implemented")
142
- # end
143
- end
121
+ connect
144
122
145
123
@registry = Hash . new
146
124
if registry_create_policy == "resume"
@@ -175,7 +153,7 @@ def run(queue)
175
153
if registry_create_policy == "start_fresh"
176
154
@registry = list_blobs ( true )
177
155
save_registry ( @registry )
178
- @logger . info ( "starting fresh, writing a clean the registry to contain #{ @registry . size } blobs/files" )
156
+ @logger . info ( "starting fresh, writing a clean registry to contain #{ @registry . size } blobs/files" )
179
157
end
180
158
181
159
@is_json = false
@@ -188,12 +166,14 @@ def run(queue)
188
166
@tail = ''
189
167
# if codec=json sniff one files blocks A and Z to learn file_head and file_tail
190
168
if @is_json
191
- learn_encapsulation
192
169
if file_head
193
- @head = file_head
170
+ @head = file_head
194
171
end
195
172
if file_tail
196
- @tail = file_tail
173
+ @tail = file_tail
174
+ end
175
+ if file_head and file_tail and !skip_learning
176
+ learn_encapsulation
197
177
end
198
178
@logger . info ( "head will be: #{ @head } and tail is set to #{ @tail } " )
199
179
end
@@ -234,6 +214,8 @@ def run(queue)
234
214
# size nilClass when the list doesn't grow?!
235
215
# Worklist is the subset of files where the already read offset is smaller than the file size
236
216
worklist . clear
217
+ chunk = nil
218
+
237
219
worklist = newreg . select { |name , file | file [ :offset ] < file [ :length ] }
238
220
if ( worklist . size > 4 ) then @logger . info ( "worklist contains #{ worklist . size } blobs" ) end
239
221
@@ -246,19 +228,26 @@ def run(queue)
246
228
size = 0
247
229
if file [ :offset ] == 0
248
230
# This is where Sera4000 issue starts
249
- # For an append blob, reading full and crashing, retry, last_modified? ... lenght? ... committed? ...
250
- # length and skip reg value
251
- begin
252
- chunk = full_read ( name )
253
- size = chunk . size
254
- rescue Exception => e
255
- @logger . error ( "Failed to read #{ name } because of: #{ e . message } .. will continue and pretend this never happened" )
231
+ # For an append blob, reading full and crashing, retry, last_modified? ... lenght? ... committed? ...
232
+ # length and skip reg value
233
+ if ( file [ :length ] > 0 )
234
+ begin
235
+ chunk = full_read ( name )
236
+ size = chunk . size
237
+ rescue Exception => e
238
+ @logger . error ( "Failed to read #{ name } because of: #{ e . message } .. will continue and pretend this never happened" )
239
+ end
240
+ else
241
+ @logger . info ( "found a zero size file #{ name } " )
242
+ chunk = nil
256
243
end
257
244
else
258
245
chunk = partial_read_json ( name , file [ :offset ] , file [ :length ] )
259
246
@logger . debug ( "partial file #{ name } from #{ file [ :offset ] } to #{ file [ :length ] } " )
260
247
end
261
248
if logtype == "nsgflowlog" && @is_json
249
+ # skip empty chunks
250
+ unless chunk . nil?
262
251
res = resource ( name )
263
252
begin
264
253
fingjson = JSON . parse ( chunk )
@@ -267,6 +256,7 @@ def run(queue)
267
256
rescue JSON ::ParserError
268
257
@logger . error ( "parse error on #{ res [ :nsg ] } [#{ res [ :date ] } ] offset: #{ file [ :offset ] } length: #{ file [ :length ] } " )
269
258
end
259
+ end
270
260
# TODO: Convert this to line based grokking.
271
261
# TODO: ECS Compliance?
272
262
elsif logtype == "wadiis" && !@is_json
@@ -284,6 +274,7 @@ def run(queue)
284
274
end
285
275
rescue Exception => e
286
276
@logger . error ( "codec exception: #{ e . message } .. will continue and pretend this never happened" )
277
+ @registry . store ( name , { :offset => file [ :length ] , :length => file [ :length ] } )
287
278
@logger . debug ( "#{ chunk } " )
288
279
end
289
280
@processed += counter
@@ -323,8 +314,54 @@ def close
323
314
324
315
325
316
private
317
+ def connect
318
+ # Try in this order to access the storageaccount
319
+ # 1. storageaccount / sas_token
320
+ # 2. connection_string
321
+ # 3. storageaccount / access_key
322
+
323
+ unless connection_string . nil?
324
+ conn = connection_string . value
325
+ end
326
+ unless sas_token . nil?
327
+ unless sas_token . value . start_with? ( '?' )
328
+ conn = "BlobEndpoint=https://#{ storageaccount } .#{ dns_suffix } ;SharedAccessSignature=#{ sas_token . value } "
329
+ else
330
+ conn = sas_token . value
331
+ end
332
+ end
333
+ unless conn . nil?
334
+ @blob_client = Azure ::Storage ::Blob ::BlobService . create_from_connection_string ( conn )
335
+ else
336
+ # unless use_development_storage?
337
+ @blob_client = Azure ::Storage ::Blob ::BlobService . create (
338
+ storage_account_name : storageaccount ,
339
+ storage_dns_suffix : dns_suffix ,
340
+ storage_access_key : access_key . value ,
341
+ )
342
+ # else
343
+ # @logger.info("not yet implemented")
344
+ # end
345
+ end
346
+ end
347
+
326
348
def full_read ( filename )
327
- return @blob_client . get_blob ( container , filename ) [ 1 ]
349
+ tries ||= 2
350
+ begin
351
+ return @blob_client . get_blob ( container , filename ) [ 1 ]
352
+ rescue Exception => e
353
+ @logger . error ( "caught: #{ e . message } for full_read" )
354
+ if ( tries -= 1 ) > 0
355
+ if e . message = "Connection reset by peer"
356
+ connect
357
+ end
358
+ retry
359
+ end
360
+ end
361
+ begin
362
+ chuck = @blob_client . get_blob ( container , filename ) [ 1 ]
363
+ end
364
+ return chuck
328
365
end
329
366
330
367
def partial_read_json ( filename , offset , length )
@@ -475,6 +512,7 @@ def save_registry(filelist)
475
512
476
513
477
514
def learn_encapsulation
515
+ @logger . info ( "learn_encapsulation, this can be skipped by setting skip_learning => true. Or set both head_file and tail_file" )
478
516
# From one file, read first block and last block to learn head and tail
479
517
begin
480
518
blobs = @blob_client . list_blobs ( container , { max_results : 3 , prefix : @prefix } )
0 commit comments