Skip to content

Commit a017e04

Browse files
committed
I got bored and rewrote the validation step.
Should be a lot easier to grok what's going on now and there's some minor performance gains.
1 parent 2d70bec commit a017e04

File tree

8 files changed

+291
-266
lines changed

8 files changed

+291
-266
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# 2.1.1 (Future)
2+
3+
## Bugfixes
4+
- Deserializing into a `PhpObjectDictionary` with an integer key failed with an invalid cast exception. [#43](https://github.com/StringEpsilon/PhpSerializerNET/issues/43).
5+
6+
## Regular changes
7+
- Some minor performance improvements in the validation step of deserialization.
8+
19
# 2.1.0 (2025-03-07)
210

311
## Features

PhpSerializerNET.Test/Deserialize/Validation/TestArrayValidation.cs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ This Source Code Form is subject to the terms of the Mozilla Public
44
file, You can obtain one at http://mozilla.org/MPL/2.0/.
55
**/
66

7+
using System.Collections.Generic;
78
using Xunit;
89

910
namespace PhpSerializerNET.Test.Deserialize.Validation;
@@ -14,11 +15,21 @@ public class TestArrayValidation {
1415
[InlineData("a:-1:{};", "Array at position 2 has illegal, missing or malformed length.")]
1516
[InlineData("a:100:};", "Unexpected token at index 6. Expected '{' but found '}' instead.")]
1617
[InlineData("a:10000 ", "Array at position 7 has illegal, missing or malformed length.")]
18+
[InlineData("a:10000", "Unexpected end of input. Expected ':' at index 7, but input ends at index 6")]
1719
[InlineData("a:10000:", "Unexpected end of input. Expected '{' at index 8, but input ends at index 7")]
18-
[InlineData("a:1000000", "Unexpected token at index 8. Expected ':' but found '0' instead.")]
1920
[InlineData("a:2:{i:0;i:0;i:1;i:1;i:2;i:2;}", "Array at position 0 should be of length 2, but actual length is 3 or more.")]
2021
public void ThrowsOnMalformedArray(string input, string exceptionMessage) {
2122
var ex = Assert.Throws<DeserializationException>(() => PhpSerialization.Deserialize(input));
2223
Assert.Equal(exceptionMessage, ex.Message);
2324
}
25+
26+
[Fact]
27+
public void AllowsValidArray() {
28+
var result = PhpSerialization.Deserialize("a:24:{i:0;i:2147483647;i:1;i:2147483647;i:2;i:2147483647;i:3;i:2147483647;i:4;i:2147483647;i:5;i:2147483647;i:6;i:2147483647;i:7;i:2147483647;i:8;i:2147483647;i:9;i:2147483647;i:10;i:2147483647;i:11;i:2147483647;i:12;i:2147483647;i:13;i:2147483647;i:14;i:2147483647;i:15;i:2147483647;i:16;i:2147483647;i:17;i:2147483647;i:18;i:2147483647;i:19;i:2147483647;i:20;i:2147483647;i:21;i:2147483647;i:22;i:2147483647;i:23;i:2147483647;}");
29+
if (result is List<object>) {
30+
Assert.Equal(24, ((List<object>)result).Count);
31+
} else {
32+
Assert.True(false, "Expected List<object> but got " + result.GetType().Name);
33+
}
34+
}
2435
}

PhpSerializerNET.Test/Deserialize/Validation/TestDoubleValidation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public class TestDoubleValidation {
1111
[Theory]
1212
[InlineData("d", "Unexpected end of input. Expected ':' at index 1, but input ends at index 0")]
1313
[InlineData("b ", "Unexpected token at index 1. Expected ':' but found ' ' instead.")]
14-
[InlineData("d:111111", "Unexpected end of input. Expected ':' at index 7, but input ends at index 7")]
14+
[InlineData("d:111111", "Unexpected end of input. Expected ';' at index 8, but input ends at index 7")]
1515
[InlineData("d:bgg5;", "Unexpected token at index 2. 'b' is not a valid part of a floating point number.")]
1616
[InlineData("d:;", "Unexpected token at index 2: Expected floating point number, but found ';' instead.")]
1717
public void ThrowsOnMalformedDouble(string input, string exceptionMessage) {

PhpSerializerNET.Test/Deserialize/Validation/TestIntegerValidation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public class TestIntegerValidation {
1414
[InlineData("i:NaN;", "Unexpected token at index 2. 'N' is not a valid part of a number.")]
1515
[InlineData("i:12345b:;", "Unexpected token at index 7. 'b' is not a valid part of a number.")]
1616
[InlineData("i:12345.;", "Unexpected token at index 7. '.' is not a valid part of a number.")]
17-
[InlineData("i:12345", "Unexpected end of input. Expected ':' at index 6, but input ends at index 6")]
17+
[InlineData("i:12345", "Unexpected end of input. Expected ';' at index 7, but input ends at index 6")]
1818
public void ThrowsOnMalformedInteger(string input, string exceptionMessage) {
1919
var exception = Assert.Throws<DeserializationException>(() => {
2020
PhpSerialization.Deserialize(input);

PhpSerializerNET.Test/Deserialize/Validation/TestReferenceValidation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ namespace PhpSerializerNET.Test.Deserialize.Validation;
1111
public class TestReferenceValidation {
1212
[Theory]
1313
[InlineData("r", "Unexpected end of input. Expected ':' at index 1, but input ends at index 0")]
14-
[InlineData("r:1", "Unexpected token at index 2: Expected number, but found ';' instead.")]
14+
[InlineData("r:1", "Unexpected end of input. Expected ';' at index 3, but input ends at index 2")]
1515
[InlineData("r:1;", "Invalid reference: '1' can not be resolved.")]
1616
public void ThrowsOnInvalidReference(string input, string exceptionMessage) {
1717
var ex = Assert.Throws<DeserializationException>(() => PhpSerialization.Deserialize(input));
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
using System;
2+
using System.Runtime.CompilerServices;
3+
using PhpSerializerNET;
4+
5+
internal static class FormatValidator {
6+
internal static int Validate(in ReadOnlySpan<byte> input) {
7+
int count = 0;
8+
int position = 0;
9+
Validate(input, ref count, ref position);
10+
if (input.Length > position) {
11+
throw new DeserializationException($"Unexpected token '{(char)input[position]}' at position {position}.");
12+
}
13+
return count;
14+
}
15+
16+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
17+
private static void Validate(ReadOnlySpan<byte> input, ref int count, ref int position) {
18+
switch (input[position]) {
19+
case (byte)'r':
20+
case (byte)'R':
21+
VisitReference(input, ref position);
22+
break;
23+
case (byte)'b':
24+
VisitBoolean(input, ref position);
25+
break;
26+
case (byte)'N':
27+
VisitNull(input, ref position);
28+
break;
29+
case (byte)'s':
30+
VisitString(input, ref position);
31+
break;
32+
case (byte)'i':
33+
VisitInteger(input, ref position);
34+
break;
35+
case (byte)'d':
36+
VisitDouble(input, ref position);
37+
break;
38+
case (byte)'a':
39+
VisitArray(input, ref count, ref position);
40+
break;
41+
case (byte)'O':
42+
VisitObject(input, ref count, ref position);
43+
break;
44+
default:
45+
throw new DeserializationException(
46+
$"Unexpected token '{(char)input[position]}' at position {position}."
47+
);
48+
}
49+
count++;
50+
}
51+
52+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
53+
private static void VisitToken(ReadOnlySpan<byte> input, byte token, ref int position) {
54+
if (input.Length <= position) {
55+
throw new DeserializationException(
56+
$"Unexpected end of input. Expected '{(char)token}' at index {position}, but input ends at index {position-1}"
57+
);
58+
}
59+
if (input[position] != token) {
60+
throw new DeserializationException(
61+
$"Unexpected token at index {position}. Expected '{(char)token}' but found '{(char)input[position]}' instead."
62+
);
63+
}
64+
position++;
65+
}
66+
67+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
68+
private static int VisitLength(ReadOnlySpan<byte> input, string dataType, ref int position) {
69+
int length = 0;
70+
for (; position < input.Length; position++) {
71+
switch (input[position]) {
72+
case (byte)':':
73+
return length;
74+
case >= (byte)'0' and <= (byte)'9':
75+
length = length * 10 + (input[position] - 48);
76+
break;
77+
default:
78+
throw new DeserializationException(
79+
$"{dataType} at position {position} has illegal, missing or malformed length."
80+
);
81+
}
82+
}
83+
return length;
84+
}
85+
86+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
87+
private static void VisitDigits(ReadOnlySpan<byte> input, ref int position) {
88+
if (input[position] == (byte)';') {
89+
throw new DeserializationException(
90+
$"Unexpected token at index {position}: Expected number, but found ';' instead."
91+
);
92+
}
93+
for (; position < input.Length; position++) {
94+
switch (input[position]) {
95+
case (byte)';':
96+
return;
97+
case (byte)'+':
98+
case (byte)'-':
99+
case >= (byte)'0' and <= (byte)'9':
100+
break;
101+
default:
102+
throw new DeserializationException(
103+
$"Unexpected token at index {position}. '{(char)input[position]}' is not a valid part of a number."
104+
);
105+
}
106+
}
107+
// Edgecase: input ends here without a delimeter following:
108+
throw new DeserializationException(
109+
$"Unexpected end of input. Expected ';' at index {position}, but input ends at index {input.Length-1}"
110+
);
111+
}
112+
113+
private static void VisitReference(in ReadOnlySpan<byte> input, ref int position) {
114+
// r:1234;
115+
position++; // ":1234;"
116+
VisitToken(input, (byte)':', ref position); // "1234;"
117+
VisitDigits(input, ref position); // ";"
118+
VisitToken(input, (byte)';', ref position); // ""
119+
}
120+
121+
private static void VisitDouble(in ReadOnlySpan<byte> input, ref int position) {
122+
// i:1234;
123+
position++; // ":1234;"
124+
VisitToken(input, (byte)':', ref position); // "1234;"
125+
if (input[position] == (byte)';') {
126+
throw new DeserializationException(
127+
$"Unexpected token at index {position}: Expected floating point number, but found ';' instead."
128+
);
129+
}
130+
for (; position < input.Length; position++) {
131+
switch (input[position]) {
132+
case (byte)';':
133+
position++;
134+
return;
135+
case (byte)'+':
136+
case (byte)'.':
137+
case (byte)'-':
138+
case (byte)'E' or (byte)'e': // exponents.
139+
case (byte)'I' or (byte)'F': // infinity.
140+
case (byte)'N' or (byte)'A': // NaN.
141+
case >= (byte)'0' and <= (byte)'9':
142+
break;
143+
default:
144+
throw new DeserializationException(
145+
$"Unexpected token at index {position}. '{(char)input[position]}' is not a valid part of a floating point number."
146+
);
147+
}
148+
}
149+
// Edgecase: input ends here without a delimeter following:
150+
throw new DeserializationException(
151+
$"Unexpected end of input. Expected ';' at index {position}, but input ends at index {input.Length-1}"
152+
);
153+
}
154+
155+
private static void VisitInteger(in ReadOnlySpan<byte> input, ref int position) {
156+
// i:1234;
157+
position++; // ":1234;"
158+
VisitToken(input, (byte)':', ref position); // "1234;"
159+
VisitDigits(input, ref position); // ";"
160+
VisitToken(input, (byte)';', ref position); // ""
161+
}
162+
163+
private static void VisitString(in ReadOnlySpan<byte> input, ref int position) {
164+
// s:11:"Hello World";
165+
position++; // ':11:"Hello World";'
166+
VisitToken(input, (byte)':', ref position); // '11:"Hello World";'
167+
int length = VisitLength(input, "String", ref position); // ':"Hello World";'
168+
VisitToken(input, (byte)':', ref position); // '"Hello World";'
169+
VisitToken(input, (byte)'"', ref position); // 'Hello World";'
170+
if (position + length >= input.Length) {
171+
throw new DeserializationException(
172+
$"Illegal length of {length}. The string at position {position} points to out of bounds index {position + length}."
173+
);
174+
}
175+
position += length; // '";'
176+
VisitToken(input, (byte)'"', ref position); // ';'
177+
VisitToken(input, (byte)';', ref position); // ''
178+
}
179+
180+
private static void VisitNull(in ReadOnlySpan<byte> input, ref int position) {
181+
// 'N;'
182+
position++; // ';'
183+
VisitToken(input, (byte)';', ref position); // ''
184+
}
185+
186+
private static void VisitBoolean(in ReadOnlySpan<byte> input, ref int position) {
187+
// 'b:0;'
188+
position++; // ':0;'
189+
VisitToken(input, (byte)':', ref position); // '0;'
190+
if (position >= input.Length ) {
191+
throw new DeserializationException(
192+
$"Unexpected end of input. Expected '0' or '1' at index {position}, but input ends at index {input.Length-1}"
193+
);
194+
}
195+
if (input[position] != (byte)'0' && input[position] != (byte)'1') {
196+
throw new DeserializationException(
197+
$"Unexpected token in boolean at index {position}. "
198+
+ $"Expected either '1' or '0', but found '{(char)input[position]}' instead."
199+
);
200+
}
201+
position++; // ';'
202+
VisitToken(input, (byte)';', ref position); // '0;'
203+
}
204+
205+
private static void VisitArray(in ReadOnlySpan<byte> input, ref int count, ref int position) {
206+
// 'a:2:{i:0;b:1;i:1;b:2;}'
207+
int arrayStart = position;
208+
position++; // ':2:{i:0;b:1;i:1;b:2;}'
209+
VisitToken(input, (byte)':', ref position); // '2:{i:0;b:1;i:1;b:2;}'
210+
int length = VisitLength(input, "Array", ref position); // ':{i:0;b:1;i:1;b:2;}'
211+
VisitToken(input, (byte)':', ref position); // '{i:0;b:1;i:1;b:2;}'
212+
VisitToken(input, (byte)'{', ref position); // 'i:0;b:1;i:1;b:2;}'
213+
int i = 0;
214+
while (input[position] != (byte)'}') {
215+
Validate(input, ref count, ref position); // 'b:1;i:1;b:2;}'
216+
Validate(input, ref count, ref position); // 'i:1;b:2;}'
217+
i++;
218+
if (i > length) {
219+
throw new DeserializationException(
220+
$"Array at position {arrayStart} should be of length {length}, " +
221+
$"but actual length is {i} or more."
222+
);
223+
}
224+
}
225+
// '}'
226+
VisitToken(input, (byte)'}', ref position);
227+
}
228+
229+
private static void VisitObject(in ReadOnlySpan<byte> input, ref int count, ref int position) {
230+
// 'O:8:"stdClass":2:{i:0;b:1;i:1;b:2;}'
231+
int objectStart = position;
232+
position++; // ':8:"stdClass":2:{i:0;b:1;i:1;b:2;}'
233+
VisitToken(input, (byte)':', ref position); // '8:"stdClass":2:{i:0;b:1;i:1;b:2;}'
234+
int nameLength = VisitLength(input, "Object", ref position); // '8:"stdClass":2:{i:0;b:1;i:1;b:2;}'
235+
VisitToken(input, (byte)':', ref position); // '"stdClass":2:{i:0;b:1;i:1;b:2;}'
236+
VisitToken(input, (byte)'"', ref position); // 'stdClass":2:{i:0;b:1;i:1;b:2;}'
237+
if (position + nameLength >= input.Length) {
238+
throw new DeserializationException(
239+
$"Illegal length of {nameLength}. The string at position {position} points to out of bounds index {position + nameLength}."
240+
);
241+
}
242+
position += nameLength; // '":2:{i:0;b:1;i:1;b:2;}'
243+
VisitToken(input, (byte)'"', ref position); // ':2:{i:0;b:1;i:1;b:2;}'
244+
VisitToken(input, (byte)':', ref position); // '2:{i:0;b:1;i:1;b:2;}'
245+
int length = VisitLength(input, "Object", ref position); // ':{i:0;b:1;i:1;b:2;}'
246+
VisitToken(input, (byte)':', ref position); // '{i:0;b:1;i:1;b:2;}'
247+
VisitToken(input, (byte)'{', ref position); // 'i:0;b:1;i:1;b:2;}'
248+
int i = 0;
249+
while (input[position] != (byte)'}') {
250+
Validate(input, ref count, ref position); // 'b:1;i:1;b:2;}'
251+
Validate(input, ref count, ref position); // 'i:1;b:2;}'
252+
i++;
253+
if (i > length) {
254+
throw new DeserializationException(
255+
$"Object at position {objectStart} should have {length} properties, " +
256+
$"but actually has {i} or more properties."
257+
);
258+
}
259+
}
260+
// '}'
261+
VisitToken(input, (byte)'}', ref position);
262+
}
263+
}

0 commit comments

Comments
 (0)