Skip to content

fix alignment in readSourceFileToEndAlloc #24518

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions lib/std/unicode.zig
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ pub const Utf16LeIterator = struct {
bytes: []const u8,
i: usize,

pub fn init(s: []const u16) Utf16LeIterator {
pub fn init(s: []align(1) const u16) Utf16LeIterator {
return Utf16LeIterator{
.bytes = mem.sliceAsBytes(s),
.i = 0,
Expand Down Expand Up @@ -917,7 +917,8 @@ test fmtUtf8 {

fn utf16LeToUtf8ArrayListImpl(
result: *std.ArrayList(u8),
utf16le: []const u16,
comptime alignment: std.mem.Alignment,
utf16le: []align(alignment.toByteUnits()) const u16,
comptime surrogates: Surrogates,
) (switch (surrogates) {
.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
Expand Down Expand Up @@ -969,7 +970,7 @@ pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;

pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
try result.ensureUnusedCapacity(utf16le.len);
return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);
return utf16LeToUtf8ArrayListImpl(result, .of(u16), utf16le, .cannot_encode_surrogate_half);
}

/// Caller must free returned memory.
Expand All @@ -978,17 +979,26 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
errdefer result.deinit();

try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
try utf16LeToUtf8ArrayListImpl(&result, .of(u16), utf16le, .cannot_encode_surrogate_half);
return result.toOwnedSlice();
}

/// Caller must free returned memory.
pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {
return alignedUtf16LeToUtf8AllocZ(allocator, .of(u16), utf16le);
}

/// Caller must free returned memory.
pub fn alignedUtf16LeToUtf8AllocZ(
allocator: mem.Allocator,
comptime alignment: mem.Alignment,
utf16le: []align(alignment.toByteUnits()) const u16,
) Utf16LeToUtf8AllocError![:0]u8 {
// optimistically guess that it will all be ascii (and allocate space for the null terminator)
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
errdefer result.deinit();

try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
try utf16LeToUtf8ArrayListImpl(&result, alignment, utf16le, .cannot_encode_surrogate_half);
return result.toOwnedSliceSentinel(0);
}

Expand Down Expand Up @@ -1752,7 +1762,7 @@ pub const Wtf8Iterator = struct {

pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
try result.ensureUnusedCapacity(utf16le.len);
return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half);
return utf16LeToUtf8ArrayListImpl(result, .of(u16), utf16le, .can_encode_surrogate_half);
}

/// Caller must free returned memory.
Expand All @@ -1761,7 +1771,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Al
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len);
errdefer result.deinit();

try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
try utf16LeToUtf8ArrayListImpl(&result, .of(u16), wtf16le, .can_encode_surrogate_half);
return result.toOwnedSlice();
}

Expand All @@ -1771,7 +1781,7 @@ pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.A
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1);
errdefer result.deinit();

try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
try utf16LeToUtf8ArrayListImpl(&result, .of(u16), wtf16le, .can_encode_surrogate_half);
return result.toOwnedSliceSentinel(0);
}

Expand Down Expand Up @@ -1979,7 +1989,7 @@ pub const Wtf16LeIterator = struct {
bytes: []const u8,
i: usize,

pub fn init(s: []const u16) Wtf16LeIterator {
pub fn init(s: []align(1) const u16) Wtf16LeIterator {
return Wtf16LeIterator{
.bytes = mem.sliceAsBytes(s),
.i = 0,
Expand Down
6 changes: 3 additions & 3 deletions lib/std/zig.zig
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ test isUnderscore {
}

pub fn readSourceFileToEndAlloc(gpa: Allocator, file_reader: *std.fs.File.Reader) ![:0]u8 {
var buffer: std.ArrayListAlignedUnmanaged(u8, .@"2") = .empty;
var buffer: std.ArrayListUnmanaged(u8) = .empty;
defer buffer.deinit(gpa);

if (file_reader.getSize()) |size| {
Expand All @@ -543,7 +543,7 @@ pub fn readSourceFileToEndAlloc(gpa: Allocator, file_reader: *std.fs.File.Reader
try buffer.ensureTotalCapacityPrecise(gpa, casted_size + 1);
} else |_| {}

try file_reader.interface.appendRemaining(gpa, .@"2", &buffer, .limited(max_src_size));
try file_reader.interface.appendRemaining(gpa, null, &buffer, .limited(max_src_size));

// Detect unsupported file types with their Byte Order Mark
const unsupported_boms = [_][]const u8{
Expand All @@ -560,7 +560,7 @@ pub fn readSourceFileToEndAlloc(gpa: Allocator, file_reader: *std.fs.File.Reader
// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
if (std.mem.startsWith(u8, buffer.items, "\xff\xfe")) {
if (buffer.items.len % 2 != 0) return error.InvalidEncoding;
return std.unicode.utf16LeToUtf8AllocZ(gpa, @ptrCast(buffer.items)) catch |err| switch (err) {
return std.unicode.alignedUtf16LeToUtf8AllocZ(gpa, .@"1", @ptrCast(buffer.items)) catch |err| switch (err) {
error.DanglingSurrogateHalf => error.UnsupportedEncoding,
error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
Expand Down