Skip to content

Prevent ConsumeNextToken from inlining into ConsumeNextTokenOrRollback#126659

Draft
EgorBo wants to merge 2 commits intodotnet:mainfrom
EgorBo:arm64-narrow-unroll
Draft

Prevent ConsumeNextToken from inlining into ConsumeNextTokenOrRollback#126659
EgorBo wants to merge 2 commits intodotnet:mainfrom
EgorBo:arm64-narrow-unroll

Conversation

@EgorBo
Copy link
Copy Markdown
Member

@EgorBo EgorBo commented Apr 8, 2026

testing some theories

Copilot AI review requested due to automatic review settings April 8, 2026 20:34
@EgorBo EgorBo marked this pull request as draft April 8, 2026 20:34
@EgorBo
Copy link
Copy Markdown
Member Author

EgorBo commented Apr 8, 2026

@EgorBot -linux_azure_arm -linux_aws_amd -linux_aws_arm -profiler

using System.Text;
using System.Text.Json;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(args);

[MemoryDiagnoser]
public class Benchmarks
{
    // ── TokenSerialization fields ────────────────────────────────────────────
    private List<object> _tokenObjects;
    [ThreadStatic] static Utf8JsonWriter t_writer;
    [ThreadStatic] static MemoryStream t_stream;

    [GlobalSetup]
    public void Setup()
    {
        // TokenSerialization
        _tokenObjects = new List<object>(200);
        for (int i = 0; i < 200; i++)
        {
            if (i % 3 == 0)
                _tokenObjects.Add(GenerateRecordJson(1));
            else
                _tokenObjects.Add(new Dictionary<string, object>
                {
                    ["seq"] = i,
                    ["label"] = $"item_{i}",
                    ["blob"] = new byte[100]
                });
        }
    }

    private static string GenerateRecordJson(int targetSizeKb = 150)
    {
        var sb = new StringBuilder(targetSizeKb * 1024 + 512);
        sb.Append("{");
        sb.Append("\"TypeName\":\"product\",");
        sb.Append("\"CategoryCode\":1,");
        sb.Append("\"Label\":\"Product\",");
        sb.Append("\"IsAction\":false,");
        sb.Append("\"IsActionMember\":false,");
        sb.Append("\"IsTrackingEnabled\":true,");
        sb.Append("\"IsAvailableLocal\":true,");
        sb.Append("\"IsChildRecord\":false,");
        sb.Append("\"IsLinksEnabled\":true,");
        sb.Append("\"IsCustomRecord\":false,");
        sb.Append("\"PrimaryKeyField\":\"productid\",");
        sb.Append("\"PrimaryLabelField\":\"title\",");
        sb.Append("\"Fields\":[");
        int targetBytes = targetSizeKb * 1024;
        int fieldIndex = 0;
        bool firstField = true;
        while (sb.Length < targetBytes - 512)
        {
            if (!firstField) sb.Append(",");
            firstField = false;
            sb.Append("{");
            sb.Append($"\"TypeName\":\"field_{fieldIndex}\",");
            sb.Append($"\"InternalName\":\"Field_{fieldIndex}\",");
            sb.Append($"\"FieldType\":\"String\",");
            sb.Append($"\"Label\":\"Field {fieldIndex}\",");
            sb.Append($"\"MaxSize\":100,");
            sb.Append($"\"IsReadable\":true,");
            sb.Append($"\"IsCreatable\":true,");
            sb.Append($"\"IsUpdatable\":true,");
            sb.Append($"\"IsTrackingEnabled\":false,");
            sb.Append($"\"IsPrimaryKey\":false,");
            sb.Append($"\"IsVirtual\":false,");
            sb.Append($"\"Requirement\":\"None\"");
            sb.Append("}");
            fieldIndex++;
        }
        sb.Append("]");
        sb.Append("}");
        return sb.ToString();
    }

    [Benchmark]
    public void TokenSerialization()
    {
        var stream = t_stream ??= new MemoryStream(64 * 1024);
        stream.Position = 0;
        stream.SetLength(0);
        var writer = t_writer;
        if (writer == null)
        {
            writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = true });
            t_writer = writer;
        }
        else
            writer.Reset(stream);
        writer.WriteStartObject();
        writer.WriteStartArray("Catalog");
        foreach (var token in _tokenObjects)
        {
            if (token is string strToken)
            {
                if (!string.IsNullOrEmpty(strToken))
                    writer.WriteRawValue(strToken);
            }
            else if (token is Dictionary<string, object> dictToken)
            {
                writer.WriteStartObject();
                foreach (var kvp in dictToken)
                {
                    writer.WritePropertyName(kvp.Key);
                    JsonSerializer.Serialize(writer, kvp.Value);
                }
                writer.WriteEndObject();
            }
        }
        writer.WriteEndArray();
        writer.WriteEndObject();
        writer.Flush();
        if (stream.Length == 0) throw new Exception("unreachable");
    }
}

@dotnet-policy-service
Copy link
Copy Markdown
Contributor

Tagging subscribers to this area: @dotnet/area-system-text-encoding
See info in area-owners.md if you want to be subscribed.

Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

This PR improves Ascii.NarrowUtf16ToAscii throughput on ARM64 by unrolling the NEON (Vector128) inner loop to process 32 UTF-16 chars per iteration, aiming to better utilize modern ARM64 cores’ dual vector execution resources.

Changes:

  • Add a 2× unrolled ARM SIMD loop that loads 4× Vector128<ushort> (32 chars), checks for non-ASCII once, then stores 2× Vector128<byte> (32 bytes).
  • Convert the original do { ... } while (...) loop to a while (...) loop to avoid executing when the new unrolled loop fully drains the input.
  • Preserve the existing 1× vector loop as the fallback for remaining elements / non-ASCII detection.

@EgorBo EgorBo closed this Apr 8, 2026
@EgorBo EgorBo reopened this Apr 8, 2026
@EgorBo EgorBo force-pushed the arm64-narrow-unroll branch from 34c2bdd to 9e27b05 Compare April 8, 2026 21:30
@EgorBo EgorBo changed the title Unroll ARM64 ASCII narrowing loop 2x in NarrowUtf16ToAscii Prevent ConsumeNextToken from inlining into ConsumeNextTokenOrRollback Apr 8, 2026
@EgorBo

This comment was marked as outdated.

Copilot AI review requested due to automatic review settings April 8, 2026 22:33
@EgorBo
Copy link
Copy Markdown
Member Author

EgorBo commented Apr 8, 2026

@EgorBot -linux_azure_arm -arm

using System.Text;
using System.Text.Json;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(args);

[MemoryDiagnoser]
public class Benchmarks
{
    // ── TokenSerialization fields ────────────────────────────────────────────
    private List<object> _tokenObjects;
    [ThreadStatic] static Utf8JsonWriter t_writer;
    [ThreadStatic] static MemoryStream t_stream;

    [GlobalSetup]
    public void Setup()
    {
        // TokenSerialization
        _tokenObjects = new List<object>(200);
        for (int i = 0; i < 200; i++)
        {
            if (i % 3 == 0)
                _tokenObjects.Add(GenerateRecordJson(1));
            else
                _tokenObjects.Add(new Dictionary<string, object>
                {
                    ["seq"] = i,
                    ["label"] = $"item_{i}",
                    ["blob"] = new byte[100]
                });
        }
    }

    private static string GenerateRecordJson(int targetSizeKb = 150)
    {
        var sb = new StringBuilder(targetSizeKb * 1024 + 512);
        sb.Append("{");
        sb.Append("\"TypeName\":\"product\",");
        sb.Append("\"CategoryCode\":1,");
        sb.Append("\"Label\":\"Product\",");
        sb.Append("\"IsAction\":false,");
        sb.Append("\"IsActionMember\":false,");
        sb.Append("\"IsTrackingEnabled\":true,");
        sb.Append("\"IsAvailableLocal\":true,");
        sb.Append("\"IsChildRecord\":false,");
        sb.Append("\"IsLinksEnabled\":true,");
        sb.Append("\"IsCustomRecord\":false,");
        sb.Append("\"PrimaryKeyField\":\"productid\",");
        sb.Append("\"PrimaryLabelField\":\"title\",");
        sb.Append("\"Fields\":[");
        int targetBytes = targetSizeKb * 1024;
        int fieldIndex = 0;
        bool firstField = true;
        while (sb.Length < targetBytes - 512)
        {
            if (!firstField) sb.Append(",");
            firstField = false;
            sb.Append("{");
            sb.Append($"\"TypeName\":\"field_{fieldIndex}\",");
            sb.Append($"\"InternalName\":\"Field_{fieldIndex}\",");
            sb.Append($"\"FieldType\":\"String\",");
            sb.Append($"\"Label\":\"Field {fieldIndex}\",");
            sb.Append($"\"MaxSize\":100,");
            sb.Append($"\"IsReadable\":true,");
            sb.Append($"\"IsCreatable\":true,");
            sb.Append($"\"IsUpdatable\":true,");
            sb.Append($"\"IsTrackingEnabled\":false,");
            sb.Append($"\"IsPrimaryKey\":false,");
            sb.Append($"\"IsVirtual\":false,");
            sb.Append($"\"Requirement\":\"None\"");
            sb.Append("}");
            fieldIndex++;
        }
        sb.Append("]");
        sb.Append("}");
        return sb.ToString();
    }

    [Benchmark]
    public void TokenSerialization()
    {
        var stream = t_stream ??= new MemoryStream(64 * 1024);
        stream.Position = 0;
        stream.SetLength(0);
        var writer = t_writer;
        if (writer == null)
        {
            writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = true });
            t_writer = writer;
        }
        else
            writer.Reset(stream);
        writer.WriteStartObject();
        writer.WriteStartArray("Catalog");
        foreach (var token in _tokenObjects)
        {
            if (token is string strToken)
            {
                if (!string.IsNullOrEmpty(strToken))
                    writer.WriteRawValue(strToken);
            }
            else if (token is Dictionary<string, object> dictToken)
            {
                writer.WriteStartObject();
                foreach (var kvp in dictToken)
                {
                    writer.WritePropertyName(kvp.Key);
                    JsonSerializer.Serialize(writer, kvp.Value);
                }
                writer.WriteEndObject();
            }
        }
        writer.WriteEndArray();
        writer.WriteEndObject();
        writer.Flush();
        if (stream.Length == 0) throw new Exception("unreachable");
    }
}

Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Copilot reviewed 2 out of 2 changed files in this pull request and generated 1 comment.

Comment on lines +13 to +16
/// <summary>IndexOfAny('"', '\', less than 32)</summary>
/// <remarks>https://tools.ietf.org/html/rfc8259</remarks>
private static readonly SearchValues<byte> s_controlQuoteBackslash = SearchValues.Create(
// Any Control, < 32 (' ')
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F"u8 +
// Quote
"\""u8 +
// Backslash
"\\"u8);
public static unsafe int IndexOfQuoteOrAnyControlOrBackSlash(this ReadOnlySpan<byte> span)
{
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR’s title/description focus on adding NoInlining to Utf8JsonReader.ConsumeNextToken, but this file also replaces the net8 implementation of IndexOfQuoteOrAnyControlOrBackSlash (SearchValues/Span.IndexOfAny) with a large custom unsafe+Vector scan. Please either (1) update the PR title/description to explain why this additional change is required and what perf/correctness data supports it, or (2) split it into a separate PR so the inlining change can be reviewed independently.

Copilot uses AI. Check for mistakes.
@EgorBo EgorBo force-pushed the arm64-narrow-unroll branch from 325cdb3 to 99fb05a Compare April 9, 2026 00:01
@EgorBo
Copy link
Copy Markdown
Member Author

EgorBo commented Apr 9, 2026

@EgorBot -arm -linux_arm

using System.Buffers;
using BenchmarkDotNet.Attributes;

public class Benchmarks
{
    private static readonly SearchValues<byte> s_controlQuoteBackslash = SearchValues.Create(
        "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B"u8 +
        "\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018"u8 +
        "\u0019\u001A\u001B\u001C\u001D\u001E\u001F"u8 + "\""u8 + "\\"u8);

    private byte[] _str = "Product description with some text that is a bit longer than usual\""u8.ToArray();

    [Benchmark]
    public int Medium() => _str.AsSpan().IndexOfAny(s_controlQuoteBackslash);
}

Copilot AI review requested due to automatic review settings April 9, 2026 00:26
@EgorBo
Copy link
Copy Markdown
Member Author

EgorBo commented Apr 9, 2026

@EgorBot -arm -linux_arm

using System.Buffers;
using BenchmarkDotNet.Attributes;

public class Benchmarks
{
    private static readonly SearchValues<byte> s_controlQuoteBackslash = SearchValues.Create(
        "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B"u8 +
        "\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018"u8 +
        "\u0019\u001A\u001B\u001C\u001D\u001E\u001F"u8 + "\""u8 + "\\"u8);

    private byte[] _str = "Product description with some text that is a bit longer than usual\""u8.ToArray();

    [Benchmark]
    public int Medium() => _str.AsSpan().IndexOfAny(s_controlQuoteBackslash);
}

Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Copilot reviewed 1 out of 1 changed files in this pull request and generated 4 comments.

ref byte currentSearchSpace = ref searchSpace;

if (searchSpaceLength < sizeof(ulong))
if (AdvSimd.IsSupported || searchSpaceLength < sizeof(ulong))
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The updated condition if (AdvSimd.IsSupported || searchSpaceLength < sizeof(ulong)) forces the scalar loop on ARM for all input lengths, bypassing the vectorized implementations below (including the existing AdvSimd code paths). This is likely a significant perf regression and is also inconsistent with similar methods in this file that gate on IsVectorizationSupported / length only. Consider reverting to the previous length-only check, or gating on !IsVectorizationSupported if the intent is to disable vectorization when no SIMD is available.

Suggested change
if (AdvSimd.IsSupported || searchSpaceLength < sizeof(ulong))
if (searchSpaceLength < sizeof(ulong))

Copilot uses AI. Check for mistakes.
if (AdvSimd.IsSupported && typeof(T) == typeof(byte))
{
ulong matches = AdvSimd.ShiftRightLogicalNarrowingLower(result.AsUInt16(), 4).AsUInt64().ToScalar();
return BitOperations.TrailingZeroCount(matches) >> 2 + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the AdvSimd fast path, operator precedence makes the return expression compute TrailingZeroCount(matches) >> (2 + offset) instead of (TrailingZeroCount(matches) >> 2) + offset, which will produce incorrect indices. Add parentheses so the shift and the addition are applied in the intended order.

Suggested change
return BitOperations.TrailingZeroCount(matches) >> 2 + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));
return (BitOperations.TrailingZeroCount(matches) >> 2) + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));

Copilot uses AI. Check for mistakes.
Comment on lines +1473 to +1478
if (AdvSimd.IsSupported && typeof(T) == typeof(byte))
{
ulong matches = AdvSimd.ShiftRightLogicalNarrowingLower(result.AsUInt16(), 4).AsUInt64().ToScalar();
return BitOperations.TrailingZeroCount(matches) >> 2 + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));
}

Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The AdvSimd branch computes matches via ShiftRightLogicalNarrowingLower(result.AsUInt16(), 4), which narrows 8x16-bit lanes to a 64-bit result. This transformation cannot represent all 16 byte lanes of the original Vector128<byte> (and AsUInt16() groups bytes into pairs), so a match in the upper half of the vector (or in the second byte of a pair) can yield an incorrect index. Unless there is a proven and correct packing scheme here, this path should either use the existing TNegator.ExtractMask(result) logic or implement a mask extraction that preserves per-byte lane positions across the full 128-bit vector.

Suggested change
if (AdvSimd.IsSupported && typeof(T) == typeof(byte))
{
ulong matches = AdvSimd.ShiftRightLogicalNarrowingLower(result.AsUInt16(), 4).AsUInt64().ToScalar();
return BitOperations.TrailingZeroCount(matches) >> 2 + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));
}

Copilot uses AI. Check for mistakes.
ref byte currentSearchSpace = ref searchSpace;

if (searchSpaceLength < sizeof(ulong))
if (AdvSimd.IsSupported || searchSpaceLength < sizeof(ulong))
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR title/description mention preventing ConsumeNextToken inlining, but the changes in this diff are in IndexOfAnyAsciiSearcher and alter SIMD/scalar behavior and index computation. Please update the PR metadata to match the actual change being made (or move these changes to the intended location) so reviewers and release notes aren’t misled.

Copilot uses AI. Check for mistakes.
@EgorBo EgorBo force-pushed the arm64-narrow-unroll branch from c243593 to 3e2e8d3 Compare April 9, 2026 08:59
@EgorBo
Copy link
Copy Markdown
Member Author

EgorBo commented Apr 9, 2026

@EgorBot -arm -aws_arm -linux_arm

using System.Text;
using System.Text.Json;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(args);

[MemoryDiagnoser]
public class Benchmarks
{
    // ── TokenSerialization fields ────────────────────────────────────────────
    private List<object> _tokenObjects;
    [ThreadStatic] static Utf8JsonWriter t_writer;
    [ThreadStatic] static MemoryStream t_stream;

    [GlobalSetup]
    public void Setup()
    {
        // TokenSerialization
        _tokenObjects = new List<object>(200);
        for (int i = 0; i < 200; i++)
        {
            if (i % 3 == 0)
                _tokenObjects.Add(GenerateRecordJson(1));
            else
                _tokenObjects.Add(new Dictionary<string, object>
                {
                    ["seq"] = i,
                    ["label"] = $"item_{i}",
                    ["blob"] = new byte[100]
                });
        }
    }

    private static string GenerateRecordJson(int targetSizeKb = 150)
    {
        var sb = new StringBuilder(targetSizeKb * 1024 + 512);
        sb.Append("{");
        sb.Append("\"TypeName\":\"product\",");
        sb.Append("\"CategoryCode\":1,");
        sb.Append("\"Label\":\"Product\",");
        sb.Append("\"IsAction\":false,");
        sb.Append("\"IsActionMember\":false,");
        sb.Append("\"IsTrackingEnabled\":true,");
        sb.Append("\"IsAvailableLocal\":true,");
        sb.Append("\"IsChildRecord\":false,");
        sb.Append("\"IsLinksEnabled\":true,");
        sb.Append("\"IsCustomRecord\":false,");
        sb.Append("\"PrimaryKeyField\":\"productid\",");
        sb.Append("\"PrimaryLabelField\":\"title\",");
        sb.Append("\"Fields\":[");
        int targetBytes = targetSizeKb * 1024;
        int fieldIndex = 0;
        bool firstField = true;
        while (sb.Length < targetBytes - 512)
        {
            if (!firstField) sb.Append(",");
            firstField = false;
            sb.Append("{");
            sb.Append($"\"TypeName\":\"field_{fieldIndex}\",");
            sb.Append($"\"InternalName\":\"Field_{fieldIndex}\",");
            sb.Append($"\"FieldType\":\"String\",");
            sb.Append($"\"Label\":\"Field {fieldIndex}\",");
            sb.Append($"\"MaxSize\":100,");
            sb.Append($"\"IsReadable\":true,");
            sb.Append($"\"IsCreatable\":true,");
            sb.Append($"\"IsUpdatable\":true,");
            sb.Append($"\"IsTrackingEnabled\":false,");
            sb.Append($"\"IsPrimaryKey\":false,");
            sb.Append($"\"IsVirtual\":false,");
            sb.Append($"\"Requirement\":\"None\"");
            sb.Append("}");
            fieldIndex++;
        }
        sb.Append("]");
        sb.Append("}");
        return sb.ToString();
    }

    [Benchmark]
    public void TokenSerialization()
    {
        var stream = t_stream ??= new MemoryStream(64 * 1024);
        stream.Position = 0;
        stream.SetLength(0);
        var writer = t_writer;
        if (writer == null)
        {
            writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = true });
            t_writer = writer;
        }
        else
            writer.Reset(stream);
        writer.WriteStartObject();
        writer.WriteStartArray("Catalog");
        foreach (var token in _tokenObjects)
        {
            if (token is string strToken)
            {
                if (!string.IsNullOrEmpty(strToken))
                    writer.WriteRawValue(strToken);
            }
            else if (token is Dictionary<string, object> dictToken)
            {
                writer.WriteStartObject();
                foreach (var kvp in dictToken)
                {
                    writer.WritePropertyName(kvp.Key);
                    JsonSerializer.Serialize(writer, kvp.Value);
                }
                writer.WriteEndObject();
            }
        }
        writer.WriteEndArray();
        writer.WriteEndObject();
        writer.Flush();
        if (stream.Length == 0) throw new Exception("unreachable");
    }
}

Copilot AI review requested due to automatic review settings April 9, 2026 09:29
@EgorBo
Copy link
Copy Markdown
Member Author

EgorBo commented Apr 9, 2026

@EgorBot -arm -aws_arm -linux_arm

using System.Text;
using System.Text.Json;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(args);

[MemoryDiagnoser]
public class Benchmarks
{
    // ── TokenSerialization fields ────────────────────────────────────────────
    private List<object> _tokenObjects;
    [ThreadStatic] static Utf8JsonWriter t_writer;
    [ThreadStatic] static MemoryStream t_stream;

    [GlobalSetup]
    public void Setup()
    {
        // TokenSerialization
        _tokenObjects = new List<object>(200);
        for (int i = 0; i < 200; i++)
        {
            if (i % 3 == 0)
                _tokenObjects.Add(GenerateRecordJson(1));
            else
                _tokenObjects.Add(new Dictionary<string, object>
                {
                    ["seq"] = i,
                    ["label"] = $"item_{i}",
                    ["blob"] = new byte[100]
                });
        }
    }

    private static string GenerateRecordJson(int targetSizeKb = 150)
    {
        var sb = new StringBuilder(targetSizeKb * 1024 + 512);
        sb.Append("{");
        sb.Append("\"TypeName\":\"product\",");
        sb.Append("\"CategoryCode\":1,");
        sb.Append("\"Label\":\"Product\",");
        sb.Append("\"IsAction\":false,");
        sb.Append("\"IsActionMember\":false,");
        sb.Append("\"IsTrackingEnabled\":true,");
        sb.Append("\"IsAvailableLocal\":true,");
        sb.Append("\"IsChildRecord\":false,");
        sb.Append("\"IsLinksEnabled\":true,");
        sb.Append("\"IsCustomRecord\":false,");
        sb.Append("\"PrimaryKeyField\":\"productid\",");
        sb.Append("\"PrimaryLabelField\":\"title\",");
        sb.Append("\"Fields\":[");
        int targetBytes = targetSizeKb * 1024;
        int fieldIndex = 0;
        bool firstField = true;
        while (sb.Length < targetBytes - 512)
        {
            if (!firstField) sb.Append(",");
            firstField = false;
            sb.Append("{");
            sb.Append($"\"TypeName\":\"field_{fieldIndex}\",");
            sb.Append($"\"InternalName\":\"Field_{fieldIndex}\",");
            sb.Append($"\"FieldType\":\"String\",");
            sb.Append($"\"Label\":\"Field {fieldIndex}\",");
            sb.Append($"\"MaxSize\":100,");
            sb.Append($"\"IsReadable\":true,");
            sb.Append($"\"IsCreatable\":true,");
            sb.Append($"\"IsUpdatable\":true,");
            sb.Append($"\"IsTrackingEnabled\":false,");
            sb.Append($"\"IsPrimaryKey\":false,");
            sb.Append($"\"IsVirtual\":false,");
            sb.Append($"\"Requirement\":\"None\"");
            sb.Append("}");
            fieldIndex++;
        }
        sb.Append("]");
        sb.Append("}");
        return sb.ToString();
    }

    [Benchmark]
    public void TokenSerialization()
    {
        var stream = t_stream ??= new MemoryStream(64 * 1024);
        stream.Position = 0;
        stream.SetLength(0);
        var writer = t_writer;
        if (writer == null)
        {
            writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = true });
            t_writer = writer;
        }
        else
            writer.Reset(stream);
        writer.WriteStartObject();
        writer.WriteStartArray("Catalog");
        foreach (var token in _tokenObjects)
        {
            if (token is string strToken)
            {
                if (!string.IsNullOrEmpty(strToken))
                    writer.WriteRawValue(strToken);
            }
            else if (token is Dictionary<string, object> dictToken)
            {
                writer.WriteStartObject();
                foreach (var kvp in dictToken)
                {
                    writer.WritePropertyName(kvp.Key);
                    JsonSerializer.Serialize(writer, kvp.Value);
                }
                writer.WriteEndObject();
            }
        }
        writer.WriteEndArray();
        writer.WriteEndObject();
        writer.Flush();
        if (stream.Length == 0) throw new Exception("unreachable");
    }
}

Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Copilot reviewed 1 out of 1 changed files in this pull request and generated 2 comments.

Comment on lines +27 to +49
// Fast path for " in the first 16 bytes
if (Vector128.IsHardwareAccelerated && span.Length >= 16)
{
ref byte ptr = ref MemoryMarshal.GetReference(span);
Vector128<byte> matches = Vector128.Equals(Vector128.LoadUnsafe(ref ptr), Vector128.Create((byte)'"'));
if (AdvSimd.IsSupported)
{
// TODO: use Vector128.IndexOf for both AdvSimd and Sse2 once
ulong mask = AdvSimd.ShiftRightLogicalNarrowingLower(matches.AsUInt16(), 4).AsUInt64().ToScalar();
if (mask != 0)
{
return BitOperations.TrailingZeroCount(mask) >> 2;
}
}
else
{
uint mask = matches.ExtractMostSignificantBits();
if (mask != 0)
{
return BitOperations.TrailingZeroCount(mask);
}
}
}
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new Vector128 fast-path only searches for the quote character ("), but this helper’s contract (and its callers) require returning the first occurrence of any of: quote, backslash, or any control character (< 0x20). If a backslash or control char appears before the closing quote within the first 16 bytes, this will now incorrectly return the quote index and skip required validation/escape handling. Please either remove this fast-path or extend it to compute the earliest index across all three conditions before returning.

Suggested change
// Fast path for " in the first 16 bytes
if (Vector128.IsHardwareAccelerated && span.Length >= 16)
{
ref byte ptr = ref MemoryMarshal.GetReference(span);
Vector128<byte> matches = Vector128.Equals(Vector128.LoadUnsafe(ref ptr), Vector128.Create((byte)'"'));
if (AdvSimd.IsSupported)
{
// TODO: use Vector128.IndexOf for both AdvSimd and Sse2 once
ulong mask = AdvSimd.ShiftRightLogicalNarrowingLower(matches.AsUInt16(), 4).AsUInt64().ToScalar();
if (mask != 0)
{
return BitOperations.TrailingZeroCount(mask) >> 2;
}
}
else
{
uint mask = matches.ExtractMostSignificantBits();
if (mask != 0)
{
return BitOperations.TrailingZeroCount(mask);
}
}
}

Copilot uses AI. Check for mistakes.
Comment on lines 24 to +28
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int IndexOfQuoteOrAnyControlOrBackSlash(this ReadOnlySpan<byte> span) =>
span.IndexOfAny(s_controlQuoteBackslash);
public static int IndexOfQuoteOrAnyControlOrBackSlash(this ReadOnlySpan<byte> span)
{
// Fast path for " in the first 16 bytes
if (Vector128.IsHardwareAccelerated && span.Length >= 16)
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR title mentions preventing ConsumeNextToken from inlining into ConsumeNextTokenOrRollback, but the change here is a new SIMD fast-path inside IndexOfQuoteOrAnyControlOrBackSlash. If the intent is to affect inlining decisions, please document that relationship here (or adjust the PR title/description) so reviewers can understand why low-level string scanning logic is being changed for that purpose.

Copilot uses AI. Check for mistakes.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants