-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Vectorize String.IndexOf(char) and String.LastIndexOf(char) #16392
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,10 @@ | |
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Globalization; | ||
using System.Numerics; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
using Internal.Runtime.CompilerServices; | ||
|
||
namespace System | ||
{ | ||
|
@@ -63,24 +66,35 @@ public int IndexOf(char value, StringComparison comparisonType) | |
|
||
case StringComparison.OrdinalIgnoreCase: | ||
return CompareInfo.Invariant.IndexOf(this, value, CompareOptions.OrdinalIgnoreCase); | ||
|
||
default: | ||
throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); | ||
} | ||
} | ||
|
||
public unsafe int IndexOf(char value, int startIndex, int count) | ||
{ | ||
if (startIndex < 0 || startIndex > Length) | ||
if ((uint)startIndex > (uint)Length) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
wouldn't this can generate a problem or a different exception if passing startIndex as negative value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope.
We have such checks everywhere. And #16392 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if someone calling this API inside unchecked block would result in undesired results I guess: for example In reply to: 171733137 [](ancestors = 171733137) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not true if using unchecked blocks. In reply to: 171733480 [](ancestors = 171733480) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. never mind, the value would be just 2. I think this will work I guess In reply to: 171734390 [](ancestors = 171734390,171733480) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. never mind, the value would be just 2. I think this will work I guess. could you add some comment mentioning the limits and how this is safe? In reply to: 171733980 [](ancestors = 171733980,171733137) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Are you suggesting adding a comment everywhere we do this? We have several Span/Memory APIs that all do this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at least adding the comment on the newly introduced code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are hundreds of places in coreclr/corefx that use this idiom. IMHO, I do not think that the comment is needed. |
||
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); | ||
|
||
if (count < 0 || count > Length - startIndex) | ||
if ((uint)count > (uint)(Length - startIndex)) | ||
throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: reduce the comparisons in the bounds checks:
|
||
fixed (char* pChars = &_firstChar) | ||
{ | ||
char* pCh = pChars + startIndex; | ||
char* pEndCh = pCh + count; | ||
|
||
if (Vector.IsHardwareAccelerated && count >= Vector<ushort>.Count * 2) | ||
{ | ||
unchecked | ||
{ | ||
const int elementsPerByte = sizeof(ushort) / sizeof(byte); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: PascalCasing for constants |
||
int unaligned = ((int)pCh & (Vector<byte>.Count - 1)) / elementsPerByte; | ||
count = ((Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1)); | ||
} | ||
} | ||
SequentialScan: | ||
while (count >= 4) | ||
{ | ||
if (*pCh == value) goto ReturnIndex; | ||
|
@@ -101,6 +115,34 @@ public unsafe int IndexOf(char value, int startIndex, int count) | |
pCh++; | ||
} | ||
|
||
if (pCh < pEndCh) | ||
{ | ||
count = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1)); | ||
// Get comparison Vector | ||
Vector<ushort> vComparison = new Vector<ushort>(value); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: use var |
||
while (count > 0) | ||
{ | ||
var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned<Vector<ushort>>(pCh)); | ||
if (Vector<ushort>.Zero.Equals(vMatches)) | ||
{ | ||
pCh += Vector<ushort>.Count; | ||
count -= Vector<ushort>.Count; | ||
continue; | ||
} | ||
// Find offset of first match | ||
return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); | ||
} | ||
|
||
if (pCh < pEndCh) | ||
{ | ||
unchecked | ||
{ | ||
count = (int)(pEndCh - pCh); | ||
} | ||
goto SequentialScan; | ||
} | ||
} | ||
|
||
return -1; | ||
|
||
ReturnIndex3: pCh++; | ||
|
@@ -111,6 +153,43 @@ public unsafe int IndexOf(char value, int startIndex, int count) | |
} | ||
} | ||
|
||
// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138 | ||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static int LocateFirstFoundChar(Vector<ushort> match) | ||
{ | ||
var vector64 = Vector.AsVectorUInt64(match); | ||
ulong candidate = 0; | ||
int i = 0; | ||
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001 | ||
for (; i < Vector<ulong>.Count; i++) | ||
{ | ||
candidate = vector64[i]; | ||
if (candidate != 0) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
// Single LEA instruction with jitted const (using function result) | ||
return i * 4 + LocateFirstFoundChar(candidate); | ||
} | ||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static int LocateFirstFoundChar(ulong match) | ||
{ | ||
unchecked | ||
{ | ||
// Flag least significant power of two bit | ||
var powerOfTwoFlag = match ^ (match - 1); | ||
// Shift all powers of two into the high byte and extract | ||
return (int)((powerOfTwoFlag * XorPowerOfTwoToHighChar) >> 49); | ||
} | ||
} | ||
|
||
private const ulong XorPowerOfTwoToHighChar = (0x03ul | | ||
0x02ul << 16 | | ||
0x01ul << 32) + 1; | ||
|
||
// Returns the index of the first occurrence of any specified character in the current instance. | ||
// The search starts at startIndex and runs to startIndex + count - 1. | ||
// | ||
|
@@ -397,17 +476,27 @@ public unsafe int LastIndexOf(char value, int startIndex, int count) | |
if (Length == 0) | ||
return -1; | ||
|
||
if (startIndex < 0 || startIndex >= Length) | ||
if ((uint)startIndex >= (uint)Length) | ||
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); | ||
|
||
if (count < 0 || count - 1 > startIndex) | ||
if ((uint)count > (uint)startIndex + 1) | ||
throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); | ||
|
||
fixed (char* pChars = &_firstChar) | ||
{ | ||
char* pCh = pChars + startIndex; | ||
char* pEndCh = pCh - count; | ||
|
||
//We search [startIndex..EndIndex] | ||
if (Vector.IsHardwareAccelerated && count >= Vector<ushort>.Count * 2) | ||
{ | ||
unchecked | ||
{ | ||
const int elementsPerByte = sizeof(ushort) / sizeof(byte); | ||
count = (((int)pCh & (Vector<byte>.Count - 1)) / elementsPerByte) + 1; | ||
} | ||
} | ||
SequentialScan: | ||
while (count >= 4) | ||
{ | ||
if (*pCh == value) goto ReturnIndex; | ||
|
@@ -428,6 +517,35 @@ public unsafe int LastIndexOf(char value, int startIndex, int count) | |
pCh--; | ||
} | ||
|
||
if (pCh > pEndCh) | ||
{ | ||
count = (int)((pCh - pEndCh) & ~(Vector<ushort>.Count - 1)); | ||
|
||
// Get comparison Vector | ||
Vector<ushort> vComparison = new Vector<ushort>(value); | ||
while (count > 0) | ||
{ | ||
char* pStart = pCh - Vector<ushort>.Count + 1; | ||
var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned<Vector<ushort>>(pStart)); | ||
if (Vector<ushort>.Zero.Equals(vMatches)) | ||
{ | ||
pCh -= Vector<ushort>.Count; | ||
count -= Vector<ushort>.Count; | ||
continue; | ||
} | ||
// Find offset of last match | ||
return (int)(pStart - pChars) + LocateLastFoundChar(vMatches); | ||
} | ||
|
||
if (pCh > pEndCh) | ||
{ | ||
unchecked | ||
{ | ||
count = (int)(pCh - pEndCh); | ||
} | ||
goto SequentialScan; | ||
} | ||
} | ||
return -1; | ||
|
||
ReturnIndex3: pCh--; | ||
|
@@ -438,6 +556,40 @@ public unsafe int LastIndexOf(char value, int startIndex, int count) | |
} | ||
} | ||
|
||
// Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138 | ||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static int LocateLastFoundChar(Vector<ushort> match) | ||
{ | ||
var vector64 = Vector.AsVectorUInt64(match); | ||
ulong candidate = 0; | ||
int i = Vector<ulong>.Count - 1; | ||
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001 | ||
for (; i >= 0; i--) | ||
{ | ||
candidate = vector64[i]; | ||
if (candidate != 0) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
// Single LEA instruction with jitted const (using function result) | ||
return i * 4 + LocateLastFoundChar(candidate); | ||
} | ||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static int LocateLastFoundChar(ulong match) | ||
{ | ||
// Find the most significant char that has its highest bit set | ||
int index = 3; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does index start at 3 here? Can you add a comment please? |
||
while ((long)match > 0) | ||
{ | ||
match = match << 16; | ||
index--; | ||
} | ||
return index; | ||
} | ||
|
||
// Returns the index of the last occurrence of any specified character in the current instance. | ||
// The search starts at startIndex and runs backwards to startIndex - count + 1. | ||
// The character at position startIndex is included in the search. startIndex is the larger | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: add space between
System.*
andInternal.*
using directives