Error handling special characters using utfOffset16 - swift

I have a function that searches and returns the index within the string of the first occurrence of searchStr, however I keep getting a fatal error whenever the string contains any special charactsers (such as ç or é). The error seems to be occuring at the utf16Offset call and I can't seem to figure out why.. here is the code I'm using:
func index(of aString: String, startingFrom position: Int? = 0) -> String.Index? {
guard let position = position else {
return nil
}
if self.startIndex.utf16Offset(in: aString) + position > self.endIndex.utf16Offset(in: aString) {
return nil
} // produces fatal error when special character encountered
let start: String.Index = self.index(self.startIndex, offsetBy: position)
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
return self.range(of: aString, options: .literal, range: range, locale: nil)?.lowerBound
}

This part seems problematic to me
if self.startIndex.utf16Offset(in: aString) + position > self.endIndex.utf16Offset(in: aString) {
return nil
}
You're taking the start index on self and convert it to its UTF-16 offset in aString. self and aString are two unrelated strings though so this is probably undefined behavior (which might be why you see it crashing in some cases).
The intent of this if statement seems to be to ensure that this produces a valid range (lower <= upper)
let start: String.Index = self.index(self.startIndex, offsetBy: position)
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
You can actually do that by just comparing the Indexes directly like this
let start: String.Index = self.index(self.startIndex, offsetBy: position)
guard start < self.endIndex else {
return nil
}
// Range is guaranteed to have valid boundaries now
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
Full example:
extension String {
func index(of aString: String, startingFrom position: Int? = 0) -> String.Index? {
guard let position = position else {
return nil
}
let start: String.Index = self.index(self.startIndex, offsetBy: position)
guard start < self.endIndex else {
return nil
}
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
return self.range(of: aString, options: .literal, range: range, locale: nil)?.lowerBound
}
}
// Doesn't crash anymore
"aaç".distance(from: foobar.startIndex, to: foobar.index(of: "ç", startingFrom: 0)!)

Related

How to figure out the range of a substring in one string and then use it in another

Basicly I want something like this,
NSString* foobar(NSString *input) {
// say input is "1"
NSString *string = #"0123456789";
NSString *anotherString = #"零一二三四五六七八九";
NSRange range = [string rangeOfString:input];
// return "一" here
return [anotherString substringWithRange:range];
}
I tried the same stuff in Swift,
func foobar(input: String) -> String {
// say input is "1"
let string = "0123456789"
let range = string.range(of: input, options: .anchored)
let result = anotherString[range!]
// return "012" here
return String(result)
}
why?
And how can I achieve this?
String (or generally, collection) indices must only be used with the collection that they were created with. In order to find the same positions in another string, the indices must be converted to (integer) offsets and back to indices of the target string:
func foobar(input: String) -> String? {
let s1 = "0123456789"
let s2 = "😀一二三四五六七八九";
guard let range = s1.range(of: input) else {
return nil
}
let pos = s1.distance(from: s1.startIndex, to: range.lowerBound)
let len = s1.distance(from: range.lowerBound, to: range.upperBound)
guard
let lo = s2.index(s2.startIndex, offsetBy: pos, limitedBy: s2.endIndex),
let hi = s2.index(lo, offsetBy: len, limitedBy: s2.endIndex)
else {
return nil
}
return String(s2[lo..<hi])
}
print(foobar(input: "1") as Any) // Optional("一")
print(foobar(input: "123") as Any) // Optional("一二三")
print(foobar(input: "124") as Any) // nil
Your Objective-C code works as long as all characters in the string consume a single UTF-16 code unit (because that is what NSRange counts). It will not work correctly emojis, flags, and other characters which are represented as UTF-16 surrogate pairs, e.g. with
NSString *anotherString = #"😀一二三四五六七八九";
Another approach is converting strings to array of characters
func find(_ str: Character) {
let firstArr = Array("0123456789")
let secondArr = Array("零一二三四五六七八九")
guard let index = firstArr.firstIndex(of: str) else {
print("Not found")
return
}
print(firstArr[index]) // 2
print(secondArr[index]) // 二
}
find("2")

What does Index(encodedOffset:) do and how do I replace it?

The code I have used throws the following error:
'init(encodedOffset:)' is deprecated: encodedOffset has been deprecated as most common usage is incorrect. Use String.Index(utf16Offset:in:) to achieve the same behavior.
What does this actually do and how can I replace it? Is there an easier way to achieve this string formatting?
extension String {
func applyPatternOnNumbers(pattern: String, replacmentCharacter: Character) -> String {
var pureNumber = self.replacingOccurrences( of: "[^0-9]", with: "", options: .regularExpression)
for index in 0 ..< pattern.count {
guard index < pureNumber.count else { return pureNumber }
let stringIndex = String.Index(encodedOffset: index)
let patternCharacter = pattern[stringIndex]
guard patternCharacter != replacmentCharacter else { continue }
pureNumber.insert(patternCharacter, at: stringIndex)
}
return pureNumber
}
}
String.Index(encodedOffset:) converts an Int index to a String.Index index.
The suggested replacement is straightforward
let stringIndex = String.Index(utf16Offset: index, in: pattern)
But it's quite cumbersome to convert Int to String.Index forth and back.
This uses String.Index only
extension String {
func applyPatternOnNumbers(pattern: String, replacmentCharacter: Character) -> String {
var pureNumber = self.replacingOccurrences( of: "[^0-9]", with: "", options: .regularExpression)
for index in pattern.indices {
guard index < pureNumber.endIndex else { return pureNumber }
let patternCharacter = pattern[index]
guard patternCharacter != replacmentCharacter else { continue }
pureNumber.insert(patternCharacter, at: index)
}
return pureNumber
}
}

distance(from:to:)' is unavailable: Any String view index conversion can fail in Swift 4; please unwrap the optional indices

I was trying to migrate my app to Swift 4, Xcode 9. I get this error. Its coming from a 3rd party framework.
distance(from:to:)' is unavailable: Any String view index conversion can fail in Swift 4; please unwrap the optional indices
func nsRange(from range: Range<String.Index>) -> NSRange {
let utf16view = self.utf16
let from = range.lowerBound.samePosition(in: utf16view)
let to = range.upperBound.samePosition(in: utf16view)
return NSMakeRange(utf16view.distance(from: utf16view.startIndex, to: from), // Error: distance(from:to:)' is unavailable: Any String view index conversion can fail in Swift 4; please unwrap the optional indices
utf16view.distance(from: from, to: to))// Error: distance(from:to:)' is unavailable: Any String view index conversion can fail in Swift 4; please unwrap the optional indices
}
You can simply unwrap the optional indices like this:
func nsRange(from range: Range<String.Index>) -> NSRange? {
let utf16view = self.utf16
if let from = range.lowerBound.samePosition(in: utf16view), let to = range.upperBound.samePosition(in: utf16view) {
return NSMakeRange(utf16view.distance(from: utf16view.startIndex, to: from), utf16view.distance(from: from, to: to))
}
return nil
}
The error says that the distances you are generating are optionals and need to be unwrapped. Try this:
func nsRange(from range: Range<String.Index>) -> NSRange {
let utf16view = self.utf16
guard let lowerBound = utf16view.distance(from: utf16view.startIndex, to: from), let upperBound = utf16view.distance(from: from, to: to) else { return NSMakeRange(0, 0) }
return NSMakeRange(lowerBound, upperBound)
}
However the return could be handled better in the guard statement. I'd recommend making the return type of the function NSRange? and checking for nil wherever you call the function to avoid inaccurate values being returned.
Please check :
let dogString = "Dog‼🐶"
let range = dogString.range(of: "🐶")!
// This is using Range
let strRange = dogString.range(range: range)
print((dogString as NSString).substring(with: strRange!)) // 🐶
extension String {
func range(range : Range<String.Index>) -> NSRange? {
let utf16view = self.utf16
guard
let from = String.UTF16View.Index(range.lowerBound, within: utf16view),
let to = String.UTF16View.Index(range.upperBound, within: utf16view)
else { return nil }
let utf16Offset = utf16view.startIndex.encodedOffset
let toOffset = to.encodedOffset
let fromOffset = from.encodedOffset
return NSMakeRange(fromOffset - utf16Offset, toOffset - fromOffset)
}
}
// This is using NSRange
let strNSRange = dogString.range(nsRange: NSRange(range, in: dogString))
print((dogString as NSString).substring(with: strNSRange!)) // 🐶
extension String {
func range(nsRange: NSRange) -> NSRange? {
guard
let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex),
let to16 = utf16.index(utf16.startIndex, offsetBy: nsRange.length, limitedBy: utf16.endIndex),
let from = from16.samePosition(in: self),
let to = to16.samePosition(in: self)
else { return nil }
return NSMakeRange(from.encodedOffset, to.encodedOffset)
}
}

range function and crash in swift 3

My below code crashes:
func getrange(_ from: Int, length: Int) -> Range<String.Index>? {
guard let fromU16 = utf16.index(utf16.startIndex, offsetBy: from, limitedBy: utf16.endIndex), fromU16 != utf16.endIndex else {
return nil ----->crashes here
}
let toU16 = utf16.index(fromU16, offsetBy: length, limitedBy: utf16.endIndex) ?? utf16.endIndex
guard let from = String.Index(fromU16, within: self),
let to = String.Index(toU16, within: self) else { return nil }
return from ..< to
}
This code is crashing with swift 3 migration.
Can someone help debugging the issue.
Below is the sequence of events:
//input for below function is: text “123456789”, string “0”, nsrange = location =9, length=0
1) function 1
static func numericText(_ text: String, replacedBy string: String, in nsrange: NSRange) -> String {
guard let range = text.range(for: nsrange) else {
//assertionFailure("Should never reach here")
return text.numericString()
}
// Apply Replacement String to the textField text and extract only the numeric values
return text.replacingCharacters(in: range, with: string)
.numericString()
}
2) function 2
func range(for nsrange: NSRange) -> Range<String.Index>? {
return range(nsrange.location, length: nsrange.length)
}
3) function 3
func range(_ from: Int, length: Int) -> Range<String.Index>? {
guard let fromU16 = utf16.index(utf16.startIndex, offsetBy: from, limitedBy: utf16.endIndex), fromU16 != utf16.endIndex else {
return nil
}
let toU16 = utf16.index(fromU16, offsetBy: length, limitedBy: utf16.endIndex) ?? utf16.endIndex
guard let from = String.Index(fromU16, within: self),
let to = String.Index(toU16, within: self) else { return nil }
return from ..< to
}
Sorry, I didn't update during the weekend.
I reviewed your question.
I can implement your function 1:
extension String {
func getrange(_ from: Int, length: Int) -> Range<String.Index>? {
guard let fromU16 = utf16.index(utf16.startIndex, offsetBy: from, limitedBy: utf16.endIndex), fromU16 != utf16.endIndex else {
return nil
}
let toU16 = utf16.index(fromU16, offsetBy: length, limitedBy: utf16.endIndex) ?? utf16.endIndex
guard let from = String.Index(fromU16, within: self),
let to = String.Index(toU16, within: self) else { return nil }
return from ..< to
}
}
But I cant implement your function 2, is that converted to Swift3 syntax yet?
My question is this,
Below is the sequence of events:
//input for below function is: text “123456789”, string “0”, nsrange = location =9, length=0
your input, your location shouldn't be 9. As your string length is 9, the max location your can replace should be 8?
Just replacing utf with unicodeScalars in the code fixed the issue.

Swift3 & Xcode8: ’subscript' is unavailable: cannot subscript String with a CountableClosedRange<Int>,

Error 1:
When I am trying get the stringValue from Metadata shows above error in Swift3:
let myMetadata: AVMetadataMachineReadableCodeObject = metadataObjects[0] as! AVMetadataMachineReadableCodeObject
// take out the system and check-digits
let myBarcode = myMetadata.stringValue[1...11] //error
Error 2:
In extensions of String I write these to get right(x) and left(x) function to get substring:
extension String {
// length of string
var length: Int {
return self.characters.count
}
// right(x) and left(x) function to get substring
func right(_ i: Int) -> String?
{
return self[self.length-i ... self.length-1 ] //error
}
func left(_ i: Int) -> String?
{
return self[0 ... i-1] //error
}
}
Use this extension for the countable closed range [0...4] subscripting
extension String {
subscript (r: CountableClosedRange<Int>) -> String {
get {
let startIndex = self.index(self.startIndex, offsetBy: r.lowerBound)
let endIndex = self.index(startIndex, offsetBy: r.upperBound - r.lowerBound)
return self[startIndex...endIndex]
}
}
}
or a safer version which checks the bounds and returns nil rather than an out-of-range exception:
extension String {
subscript (r: CountableClosedRange<Int>) -> String? {
get {
guard r.lowerBound >= 0, let startIndex = self.index(self.startIndex, offsetBy: r.lowerBound, limitedBy: self.endIndex),
let endIndex = self.index(startIndex, offsetBy: r.upperBound - r.lowerBound, limitedBy: self.endIndex) else { return nil }
return self[startIndex...endIndex]
}
}
}
Swift 4 change: You need to create a new string from the result
return String(self[startIndex...endIndex])
I took inspiration from #vadian's answer and created a set of (Swift 4) extensions that make pulling substrings trivially easy. These do not bounds check, which is generally my preference since I shouldn't be deferring sanity checking to lower level utilities like these.
extension String {
subscript (_ index: Int) -> String {
return String(self[self.index(startIndex, offsetBy: index)])
}
subscript (_ range: CountableRange<Int>) -> String {
let lowerBound = index(startIndex, offsetBy: range.lowerBound)
let upperBound = index(startIndex, offsetBy: range.upperBound)
return String(self[lowerBound..<upperBound])
}
subscript (_ range: CountableClosedRange<Int>) -> String {
let lowerBound = index(startIndex, offsetBy: range.lowerBound)
let upperBound = index(startIndex, offsetBy: range.upperBound)
return String(self[lowerBound...upperBound])
}
subscript (_ range: CountablePartialRangeFrom<Int>) -> String {
return String(self[index(startIndex, offsetBy: range.lowerBound)...])
}
subscript (_ range: PartialRangeUpTo<Int>) -> String {
return String(self[..<index(startIndex, offsetBy: range.upperBound)])
}
subscript (_ range: PartialRangeThrough<Int>) -> String {
return String(self[...index(startIndex, offsetBy: range.upperBound)])
}
}