Skip to content

Commit bde94ac

Browse files
Ensure unicode characters are interpreted as desired (closes #20)
1 parent 3424729 commit bde94ac

File tree

4 files changed

+52
-25
lines changed

4 files changed

+52
-25
lines changed

analytics/shred.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,14 @@ package analytics
1515

1616
import (
1717
"fmt"
18+
"github.com/pkg/errors"
1819
"regexp"
1920
"strings"
2021
"unicode" // For camel to snake case - consider alternative?
2122

2223
jsoniter "github.com/json-iterator/go"
23-
"github.com/pkg/errors"
2424
)
2525

26-
var ConfigCompatibleWithStandardLibrary = jsoniter.Config{EscapeHTML: false}
27-
var json = jsoniter.ConfigCompatibleWithStandardLibrary
28-
2926
type SelfDescribingData struct {
3027
Schema string
3128
Data map[string]interface{} // TODO: See if leaving data as a string or byte array would work, and would be faster.
@@ -103,7 +100,7 @@ func fixSchema(prefix string, schemaUri string) (string, error) {
103100
func shredContexts(contexts string) ([]KeyVal, error) {
104101
ctxts := Contexts{}
105102

106-
err := json.Unmarshal([]byte(contexts), &ctxts)
103+
err := jsoniter.Unmarshal([]byte(contexts), &ctxts)
107104
if err != nil {
108105
return nil, errors.Wrap(err, "Error unmarshaling context JSON")
109106
}
@@ -137,7 +134,7 @@ func shredUnstruct(unstruct string) ([]KeyVal, error) {
137134

138135
event := UnstructEvent{}
139136

140-
err := json.Unmarshal([]byte(unstruct), &event)
137+
err := jsoniter.Unmarshal([]byte(unstruct), &event)
141138
if err != nil {
142139
return nil, errors.Wrap(err, "Error unmarshaling unstruct event JSON")
143140
}

analytics/transform.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"strings"
2020
"time"
2121

22+
jsoniter "github.com/json-iterator/go"
2223
"github.com/pkg/errors"
2324
)
2425

@@ -27,6 +28,8 @@ const (
2728
EmptyFieldErr string = `Field is empty`
2829
)
2930

31+
var json = jsoniter.Config{}.Froze()
32+
3033
type KeyVal struct {
3134
Key string
3235
Value interface{}

analytics/transform_test.go

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
package analytics
1515

1616
import (
17-
stdJson "encoding/json" // Using the std JSON package for expected values
1817
"testing"
1918

19+
jsoniter "github.com/json-iterator/go"
2020
"github.com/stretchr/testify/assert"
2121
)
2222

@@ -201,14 +201,12 @@ func BenchmarkMapifyGoodEvent(b *testing.B) {
201201
func TestToJson(t *testing.T) {
202202
assert := assert.New(t)
203203

204-
// correct value
205-
jsonEvent, err := stdJson.Marshal(eventMapWithoutGeo)
206-
if err != nil {
207-
}
208-
209204
jsonifiedEvent, err := fullEvent.ToJson()
210205
assert.Nil(err)
211-
assert.Equal(jsonEvent, jsonifiedEvent)
206+
207+
jsonifiedEventOrdered, _ := orderJson(jsonifiedEvent)
208+
209+
assert.Equal(eventMapWithoutGeoJSON, jsonifiedEventOrdered)
212210

213211
// incorrect input
214212
failedJsonify, err := ParsedEvent([]string{"one", "two"}).ToJson()
@@ -222,17 +220,30 @@ func BenchmarkToJson(b *testing.B) {
222220
}
223221
}
224222

225-
func TestToJsonWithGeo(t *testing.T) {
226-
assert := assert.New(t)
227-
228-
// correct value
229-
jsonEvent, err := stdJson.Marshal(eventMapWithGeo)
223+
// orderJson uses the jsoniter library to remarshal a []byte, resulting in a sorted JSON
224+
func orderJson(bytes []byte) ([]byte, error) {
225+
var ifce interface{}
226+
j := jsoniter.Config{SortMapKeys: true}.Froze()
227+
err := j.Unmarshal(bytes, &ifce)
228+
if err != nil {
229+
return []byte{}, err
230+
}
231+
output, err := j.Marshal(ifce)
230232
if err != nil {
233+
return []byte{}, err
231234
}
235+
return output, nil
236+
}
237+
238+
func TestToJsonWithGeo(t *testing.T) {
239+
assert := assert.New(t)
232240

233241
jsonifiedEvent, err := fullEvent.ToJsonWithGeo()
234242
assert.Nil(err)
235-
assert.Equal(jsonEvent, jsonifiedEvent)
243+
244+
jsonifiedEventOrdered, _ := orderJson(jsonifiedEvent)
245+
246+
assert.Equal(eventMapWithGeoJSON, jsonifiedEventOrdered)
236247

237248
// incorrect input
238249
failedJsonify, err := ParsedEvent([]string{"one", "two"}).ToJsonWithGeo()
@@ -330,6 +341,10 @@ func TestGetUnstructEventValue(t *testing.T) {
330341
unstructValue, err = fullEvent.GetUnstructEventValue(`elementId`)
331342
assert.Nil(err)
332343
assert.Equal(`exampleLink`, unstructValue)
344+
345+
unstructValue, err = fullEvent.GetUnstructEventValue(`unicodeTest`)
346+
assert.Nil(err)
347+
assert.Equal(`<>angry_birds`, unstructValue)
333348
}
334349

335350
func TestGetContextValue(t *testing.T) {
@@ -394,12 +409,19 @@ func TestGetSubsetJSON(t *testing.T) {
394409

395410
// correct value
396411
subsetJsonValue, err := fullEvent.GetSubsetJson([]string{"app_id", "br_features_flash", "br_features_pdf", "collector_tstamp", "unstruct_event", "contexts", "derived_contexts"}...)
397-
assert.Equal(subsetJson, subsetJsonValue)
412+
413+
subsetJsonOrdered, _ := orderJson(subsetJson)
414+
subsetJsonValueOrdered, _ := orderJson(subsetJsonValue)
415+
416+
assert.Equal(subsetJsonOrdered, subsetJsonValueOrdered)
398417
assert.Nil(err)
399418

400419
// correct values passing multiple string args
401420
subsetJsonValue2, err := fullEvent.GetSubsetJson("app_id", "br_features_flash", "br_features_pdf", "collector_tstamp", "unstruct_event", "contexts", "derived_contexts")
402-
assert.Equal(subsetJson, subsetJsonValue2)
421+
422+
subsetJsonValue2Ordered, _ := orderJson(subsetJsonValue2)
423+
424+
assert.Equal(subsetJsonOrdered, subsetJsonValue2Ordered)
403425
assert.Nil(err)
404426

405427
// incorrect field name
@@ -408,7 +430,7 @@ func TestGetSubsetJSON(t *testing.T) {
408430
assert.NotNil(err)
409431

410432
// empty value
411-
emptyJson, _ := stdJson.Marshal(make(map[string]interface{}))
433+
emptyJson, _ := jsoniter.Marshal(make(map[string]interface{}))
412434
emptyValue, err := fullEvent.GetSubsetJson("ti_name")
413435
assert.Equal(emptyJson, emptyValue)
414436
assert.Nil(err)

analytics/vars_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@
1414
package analytics
1515

1616
import (
17-
stdJson "encoding/json"
1817
"strings"
1918
"time"
19+
20+
jsoniter "github.com/json-iterator/go"
2021
)
2122

2223
var ctxt = `{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.acme/test_context/jsonschema/1-0-0","data":{"field1": 1}}, {"schema":"iglu:com.acme/test_context/jsonschema/1-0-0","data":{"field1": 2}}]}`
@@ -27,7 +28,7 @@ var invalidUnstruct = `{"data":{"data":{"key":"value"},"schema":"fail"},"schema"
2728

2829
var tstampValue, _ = time.Parse("2006-01-02 15:04:05.999", "2013-11-26 00:03:57.885")
2930

30-
var unstructString = `{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1","data":{"targetUrl":"http://www.example.com","elementClasses":["foreground"],"elementId":"exampleLink"}}}`
31+
var unstructString = `{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1","data":{"targetUrl":"http://www.example.com","elementClasses":["foreground"],"elementId":"exampleLink","unicodeTest":"<>angry_birds"}}}`
3132

3233
var contextsString = `{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:org.schema/WebPage/jsonschema/1-0-0","data":{"genre":"blog","inLanguage":"en-US","datePublished":"2014-11-06T00:00:00Z","author":"Fred Blundun","breadcrumb":["blog","releases"],"keywords":["snowplow","javascript","tracker","event"]}},{"schema":"iglu:org.w3/PerformanceTiming/jsonschema/1-0-0","data":{"navigationStart":1415358089861,"unloadEventStart":1415358090270,"unloadEventEnd":1415358090287,"redirectStart":0,"redirectEnd":0,"fetchStart":1415358089870,"domainLookupStart":1415358090102,"domainLookupEnd":1415358090102,"connectStart":1415358090103,"connectEnd":1415358090183,"requestStart":1415358090183,"responseStart":1415358090265,"responseEnd":1415358090265,"domLoading":1415358090270,"domInteractive":1415358090886,"domContentLoadedEventStart":1415358090968,"domContentLoadedEventEnd":1415358091309,"domComplete":0,"loadEventStart":0,"loadEventEnd":0}}]}`
3334

@@ -171,6 +172,9 @@ var fullEvent = ParsedEvent([]string{
171172
// tsv string
172173
var tsvEvent = strings.Join(fullEvent, "\t")
173174

175+
var eventMapWithGeoJSON = []byte(`{"app_id":"<>angry-birds","br_features_flash":false,"br_features_pdf":true,"collector_tstamp":"2013-11-26T00:03:57.885Z","contexts_com_snowplowanalytics_snowplow_ua_parser_context_1":[{"deviceFamily":"Other","osFamily":"Windows XP","osMajor":null,"osMinor":null,"osPatch":null,"osPatchMinor":null,"osVersion":"Windows XP","useragentFamily":"IE","useragentMajor":"7","useragentMinor":"0","useragentPatch":null,"useragentVersion":"IE 7.0"}],"contexts_org_schema_web_page_1":[{"author":"Fred Blundun","breadcrumb":["blog","releases"],"datePublished":"2014-11-06T00:00:00Z","genre":"blog","inLanguage":"en-US","keywords":["snowplow","javascript","tracker","event"]}],"contexts_org_w3_performance_timing_1":[{"connectEnd":1415358090183,"connectStart":1415358090103,"domComplete":0,"domContentLoadedEventEnd":1415358091309,"domContentLoadedEventStart":1415358090968,"domInteractive":1415358090886,"domLoading":1415358090270,"domainLookupEnd":1415358090102,"domainLookupStart":1415358090102,"fetchStart":1415358089870,"loadEventEnd":0,"loadEventStart":0,"navigationStart":1415358089861,"redirectEnd":0,"redirectStart":0,"requestStart":1415358090183,"responseEnd":1415358090265,"responseStart":1415358090265,"unloadEventEnd":1415358090287,"unloadEventStart":1415358090270}],"derived_tstamp":"2013-11-26T00:03:57.885Z","domain_sessionid":"2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1","domain_sessionidx":3,"domain_userid":"bc2e92ec6c204a14","dvce_created_tstamp":"2013-11-26T00:03:57.885Z","etl_tstamp":"2013-11-26T00:03:57.885Z","event":"page_view","event_fingerprint":"e3dbfa9cca0412c3d4052863cefb547f","event_format":"jsonschema","event_id":"c6ef3124-b53a-4b13-a233-0088f79dcbcb","event_name":"link_click","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","geo_city":"New York","geo_country":"US","geo_latitude":37.443604,"geo_location":"37.443604,-122.4124","geo_longitude":-122.4124,"geo_region":"TX","geo_region_name":"Florida","geo_zipcode":"94109","ip_domain":"nuvox.net","ip_isp":"FDN Communications","ip_netspeed":"Cable/DSL","ip_organization":"Bouygues Telecom","name_tracker":"cloudfront-1","network_userid":"ecdff4d0-9175-40ac-a8bb-325c49733607","page_title":"On Analytics","page_url":"http://www.snowplowanalytics.com","page_urlfragment":"4-conclusion","page_urlhost":"www.snowplowanalytics.com","page_urlpath":"/product/index.html","page_urlport":80,"page_urlquery":"id=GTM-DLRG","page_urlscheme":"http","platform":"web","true_tstamp":"2013-11-26T00:03:57.885Z","txn_id":41828,"unstruct_event_com_snowplowanalytics_snowplow_link_click_1":{"elementClasses":["foreground"],"elementId":"exampleLink","targetUrl":"http://www.example.com","unicodeTest":"<>angry_birds"},"user_fingerprint":"2161814971","user_id":"[email protected]","user_ipaddress":"92.231.54.234","v_collector":"clj-tomcat-0.1.0","v_etl":"serde-0.5.2","v_tracker":"js-2.1.0"}`)
176+
var eventMapWithoutGeoJSON = []byte(`{"app_id":"<>angry-birds","br_features_flash":false,"br_features_pdf":true,"collector_tstamp":"2013-11-26T00:03:57.885Z","contexts_com_snowplowanalytics_snowplow_ua_parser_context_1":[{"deviceFamily":"Other","osFamily":"Windows XP","osMajor":null,"osMinor":null,"osPatch":null,"osPatchMinor":null,"osVersion":"Windows XP","useragentFamily":"IE","useragentMajor":"7","useragentMinor":"0","useragentPatch":null,"useragentVersion":"IE 7.0"}],"contexts_org_schema_web_page_1":[{"author":"Fred Blundun","breadcrumb":["blog","releases"],"datePublished":"2014-11-06T00:00:00Z","genre":"blog","inLanguage":"en-US","keywords":["snowplow","javascript","tracker","event"]}],"contexts_org_w3_performance_timing_1":[{"connectEnd":1415358090183,"connectStart":1415358090103,"domComplete":0,"domContentLoadedEventEnd":1415358091309,"domContentLoadedEventStart":1415358090968,"domInteractive":1415358090886,"domLoading":1415358090270,"domainLookupEnd":1415358090102,"domainLookupStart":1415358090102,"fetchStart":1415358089870,"loadEventEnd":0,"loadEventStart":0,"navigationStart":1415358089861,"redirectEnd":0,"redirectStart":0,"requestStart":1415358090183,"responseEnd":1415358090265,"responseStart":1415358090265,"unloadEventEnd":1415358090287,"unloadEventStart":1415358090270}],"derived_tstamp":"2013-11-26T00:03:57.885Z","domain_sessionid":"2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1","domain_sessionidx":3,"domain_userid":"bc2e92ec6c204a14","dvce_created_tstamp":"2013-11-26T00:03:57.885Z","etl_tstamp":"2013-11-26T00:03:57.885Z","event":"page_view","event_fingerprint":"e3dbfa9cca0412c3d4052863cefb547f","event_format":"jsonschema","event_id":"c6ef3124-b53a-4b13-a233-0088f79dcbcb","event_name":"link_click","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","geo_city":"New York","geo_country":"US","geo_latitude":37.443604,"geo_longitude":-122.4124,"geo_region":"TX","geo_region_name":"Florida","geo_zipcode":"94109","ip_domain":"nuvox.net","ip_isp":"FDN Communications","ip_netspeed":"Cable/DSL","ip_organization":"Bouygues Telecom","name_tracker":"cloudfront-1","network_userid":"ecdff4d0-9175-40ac-a8bb-325c49733607","page_title":"On Analytics","page_url":"http://www.snowplowanalytics.com","page_urlfragment":"4-conclusion","page_urlhost":"www.snowplowanalytics.com","page_urlpath":"/product/index.html","page_urlport":80,"page_urlquery":"id=GTM-DLRG","page_urlscheme":"http","platform":"web","true_tstamp":"2013-11-26T00:03:57.885Z","txn_id":41828,"unstruct_event_com_snowplowanalytics_snowplow_link_click_1":{"elementClasses":["foreground"],"elementId":"exampleLink","targetUrl":"http://www.example.com","unicodeTest":"<>angry_birds"},"user_fingerprint":"2161814971","user_id":"[email protected]","user_ipaddress":"92.231.54.234","v_collector":"clj-tomcat-0.1.0","v_etl":"serde-0.5.2","v_tracker":"js-2.1.0"}`)
177+
174178
var eventMapWithGeo = map[string]interface{}{
175179
"app_id": "<>angry-birds",
176180
"br_features_flash": false,
@@ -262,6 +266,7 @@ var eventMapWithGeo = map[string]interface{}{
262266
"elementClasses": []interface{}{"foreground"},
263267
"elementId": "exampleLink",
264268
"targetUrl": "http://www.example.com",
269+
"unicodeTest": "<>angry_birds",
265270
},
266271
"user_fingerprint": "2161814971",
267272
"user_id": "[email protected]",
@@ -302,4 +307,4 @@ var subsetMap = map[string]interface{}{
302307
"contexts_com_snowplowanalytics_snowplow_ua_parser_context_1": eventMapWithGeo["contexts_com_snowplowanalytics_snowplow_ua_parser_context_1"],
303308
}
304309

305-
var subsetJson, _ = stdJson.Marshal(subsetMap)
310+
var subsetJson, _ = jsoniter.Marshal(subsetMap)

0 commit comments

Comments
 (0)