Swift extract string between strings using regex

Swift extract string between strings using regex - swift

I found this code online
let sample = "#This is an [hello] amazing [world]"
let regex = try? NSRegularExpression(pattern: "#(.*?)\\[.+?\\]", options: [])
let matches = regex!.matches(in: sample, options: [], range: NSMakeRange(0, sample.count))
for match in matches {
let r = (sample as NSString).substring(with: match.range)
print(r)
}
Current output
#This is an [hello]
expected output
hello
How can I extract only the needed text?
NOTE
In my case there could be multiple square brackets. See "[anotherWorld] #This is an [hello] amazing [world]". I need the text between the square brackets which is immediately next to #somerandomtext. In given sample I need hello only

You have to use a capture group to get the text between specific characters.
The pattern
"#[^\[]+\[([^\]]+)\]"
searches for a # then for one or more characters which are not [ ([^\[]+)then a [ (\[) then it captures () one or more characters which are not ] ([^\]]+) and finally it searches for a ] (\]).
let sample = "#This is an [hello] amazing [world]"
let regex = try! NSRegularExpression(pattern: #"#[^\[]+\[([^\]]+)\]"#)
if let match = regex.firstMatch(in: sample, range: NSRange(sample.startIndex..., in: sample)) {
let range = Range(match.range(at: 1), in: sample)!
let result = String(sample[range])
print(result)
}
Your way to create an NSRange and the bridge to NSString are bad practices. There are dedicated APIs to handle NSRange

If there should not be a # or [ or ] after matching the # and the wanted match from [...], you could make the pattern a bit more restrictive extending the negated character class and use a capturing group:
#[^#\]\[]+\[([^\]\[]+)\]
In parts
# Match a # char
[^#\]\[]+ Negated character class, match 1+ times any char except # [ ]
\[ Match [
( Capture group 1
[^\]\[]+ match 1+ times any char except [ ]
) Close group
\] Match ]
Regex demo

The regex approach it is already answered so I will post a native Swift approach:
let string = "[anotherWorld] #This is an [hello] amazing [world]"
if let hashTag = string.firstIndex(of: "#"),
let openBracket = string[hashTag...].firstIndex(of: "["),
let closedBracket = string[openBracket...].firstIndex(of: "]") {
let substring = string[string.index(after: openBracket)..<closedBracket]
let result = String(substring)
print(result) // "hello\n"
}
String manipulation in Swift is not very Swifty but you can extend Collection and create some SubSequence methods to make your life easier:
extension Collection where Element: Equatable {
func firstIndex(after element: Element) -> Index? {
guard let index = firstIndex(of: element) else { return nil }
return self.index(after: index)
}
func subSequence(from element: Element) -> SubSequence? {
guard let index = firstIndex(of: element) else { return nil }
return self[index...]
}
func subSequence(after element: Element) -> SubSequence? {
guard let index = firstIndex(after: element) else { return nil }
return self[index...]
}
func subSequence(upTo element: Element) -> SubSequence? {
guard let index = firstIndex(after: element) else { return nil }
return self[..<index]
}
func subSequence(upThrough element: Element) -> SubSequence? {
guard let index = firstIndex(of: element) else { return nil }
return self[...index]
}
func subSequence(from element: Element, upTo: Element) -> SubSequence? {
guard
let lower = firstIndex(of: element),
let upper = self[lower...].firstIndex(of: upTo)
else { return nil }
return self[lower..<upper]
}
func subSequence(from element: Element, upThrough: Element) -> SubSequence? {
guard
let lower = firstIndex(of: element),
let upper = self[lower...].firstIndex(of: upThrough)
else { return nil }
return self[lower...upper]
}
func subSequence(after element: Element, upTo: Element) -> SubSequence? {
guard
let lower = firstIndex(after: element),
let upper = self[lower...].firstIndex(of: upTo)
else { return nil }
return self[lower..<upper]
}
func subSequence(after element: Element, upThrough: Element) -> SubSequence? {
guard
let lower = firstIndex(after: element),
let upper = self[lower...].firstIndex(of: upThrough)
else { return nil }
return self[lower...upper]
}
}
Now you can get any subsequence between two elements (in the String case a Substring between two characters):
let string = "[anotherWorld] #This is an [hello] amazing [world]"
let subString = string.subSequence(after: "#")?.subSequence(after: "[", upTo: "]") // "hello"

Related

Swift 4 How to find Index of a particular substring in String [duplicate]

I'm used to do this in JavaScript:
var domains = "abcde".substring(0, "abcde".indexOf("cd")) // Returns "ab"
Swift doesn't have this function, how to do something similar?

edit/update:
Xcode 11.4 • Swift 5.2 or later
import Foundation
extension StringProtocol {
func index<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> Index? {
range(of: string, options: options)?.lowerBound
}
func endIndex<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> Index? {
range(of: string, options: options)?.upperBound
}
func indices<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Index] {
ranges(of: string, options: options).map(\.lowerBound)
}
func ranges<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Range<Index>] {
var result: [Range<Index>] = []
var startIndex = self.startIndex
while startIndex < endIndex,
let range = self[startIndex...]
.range(of: string, options: options) {
result.append(range)
startIndex = range.lowerBound < range.upperBound ? range.upperBound :
index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
}
return result
}
}
usage:
let str = "abcde"
if let index = str.index(of: "cd") {
let substring = str[..<index] // ab
let string = String(substring)
print(string) // "ab\n"
}
let str = "Hello, playground, playground, playground"
str.index(of: "play") // 7
str.endIndex(of: "play") // 11
str.indices(of: "play") // [7, 19, 31]
str.ranges(of: "play") // [{lowerBound 7, upperBound 11}, {lowerBound 19, upperBound 23}, {lowerBound 31, upperBound 35}]
case insensitive sample
let query = "Play"
let ranges = str.ranges(of: query, options: .caseInsensitive)
let matches = ranges.map { str[$0] } //
print(matches) // ["play", "play", "play"]
regular expression sample
let query = "play"
let escapedQuery = NSRegularExpression.escapedPattern(for: query)
let pattern = "\\b\(escapedQuery)\\w+" // matches any word that starts with "play" prefix
let ranges = str.ranges(of: pattern, options: .regularExpression)
let matches = ranges.map { str[$0] }
print(matches) // ["playground", "playground", "playground"]

Using String[Range<String.Index>] subscript you can get the sub string. You need starting index and last index to create the range and you can do it as below
let str = "abcde"
if let range = str.range(of: "cd") {
let substring = str[..<range.lowerBound] // or str[str.startIndex..<range.lowerBound]
print(substring) // Prints ab
}
else {
print("String not present")
}
If you don't define the start index this operator ..< , it take the starting index. You can also use str[str.startIndex..<range.lowerBound] instead of str[..<range.lowerBound]

Swift 5
Find index of substring
let str = "abcdecd"
if let range: Range<String.Index> = str.range(of: "cd") {
let index: Int = str.distance(from: str.startIndex, to: range.lowerBound)
print("index: ", index) //index: 2
}
else {
print("substring not found")
}
Find index of Character
let str = "abcdecd"
if let firstIndex = str.firstIndex(of: "c") {
let index = str.distance(from: str.startIndex, to: firstIndex)
print("index: ", index) //index: 2
}
else {
print("symbol not found")
}

In Swift 4 :
Getting Index of a character in a string :
let str = "abcdefghabcd"
if let index = str.index(of: "b") {
print(index) // Index(_compoundOffset: 4, _cache: Swift.String.Index._Cache.character(1))
}
Creating SubString (prefix and suffix) from String using Swift 4:
let str : String = "ilike"
for i in 0...str.count {
let index = str.index(str.startIndex, offsetBy: i) // String.Index
let prefix = str[..<index] // String.SubSequence
let suffix = str[index...] // String.SubSequence
print("prefix \(prefix), suffix : \(suffix)")
}
Output
prefix , suffix : ilike
prefix i, suffix : like
prefix il, suffix : ike
prefix ili, suffix : ke
prefix ilik, suffix : e
prefix ilike, suffix :
If you want to generate a substring between 2 indices , use :
let substring1 = string[startIndex...endIndex] // including endIndex
let subString2 = string[startIndex..<endIndex] // excluding endIndex

Doing this in Swift is possible but it takes more lines, here is a function indexOf() doing what is expected:
func indexOf(source: String, substring: String) -> Int? {
let maxIndex = source.characters.count - substring.characters.count
for index in 0...maxIndex {
let rangeSubstring = source.startIndex.advancedBy(index)..<source.startIndex.advancedBy(index + substring.characters.count)
if source.substringWithRange(rangeSubstring) == substring {
return index
}
}
return nil
}
var str = "abcde"
if let indexOfCD = indexOf(str, substring: "cd") {
let distance = str.startIndex.advancedBy(indexOfCD)
print(str.substringToIndex(distance)) // Returns "ab"
}
This function is not optimized but it does the job for short strings.

There are three closely connected issues here:
All the substring-finding methods are over in the Cocoa NSString world (Foundation)
Foundation NSRange has a mismatch with Swift Range; the former uses start and length, the latter uses endpoints
In general, Swift characters are indexed using String.Index, not Int, but Foundation characters are indexed using Int, and there is no simple direct translation between them (because Foundation and Swift have different ideas of what constitutes a character)
Given all that, let's think about how to write:
func substring(of s: String, from:Int, toSubstring s2 : String) -> Substring? {
// ?
}
The substring s2 must be sought in s using a String Foundation method. The resulting range comes back to us, not as an NSRange (even though this is a Foundation method), but as a Range of String.Index (wrapped in an Optional, in case we didn't find the substring at all). However, the other number, from, is an Int. Thus we cannot form any kind of range involving them both.
But we don't have to! All we have to do is slice off the end of our original string using a method that takes a String.Index, and slice off the start of our original string using a method that takes an Int. Fortunately, such methods exist! Like this:
func substring(of s: String, from:Int, toSubstring s2 : String) -> Substring? {
guard let r = s.range(of:s2) else {return nil}
var s = s.prefix(upTo:r.lowerBound)
s = s.dropFirst(from)
return s
}
Or, if you prefer to be able to apply this method directly to a string, like this...
let output = "abcde".substring(from:0, toSubstring:"cd")
...then make it an extension on String:
extension String {
func substring(from:Int, toSubstring s2 : String) -> Substring? {
guard let r = self.range(of:s2) else {return nil}
var s = self.prefix(upTo:r.lowerBound)
s = s.dropFirst(from)
return s
}
}

Swift 5
let alphabet = "abcdefghijklmnopqrstuvwxyz"
var index: Int = 0
if let range: Range<String.Index> = alphabet.range(of: "c") {
index = alphabet.distance(from: alphabet.startIndex, to: range.lowerBound)
print("index: ", index) //index: 2
}

Swift 5
extension String {
enum SearchDirection {
case first, last
}
func characterIndex(of character: Character, direction: String.SearchDirection) -> Int? {
let fn = direction == .first ? firstIndex : lastIndex
if let stringIndex: String.Index = fn(character) {
let index: Int = distance(from: startIndex, to: stringIndex)
return index
} else {
return nil
}
}
}
tests:
func testFirstIndex() {
let res = ".".characterIndex(of: ".", direction: .first)
XCTAssert(res == 0)
}
func testFirstIndex1() {
let res = "12345678900.".characterIndex(of: "0", direction: .first)
XCTAssert(res == 9)
}
func testFirstIndex2() {
let res = ".".characterIndex(of: ".", direction: .last)
XCTAssert(res == 0)
}
func testFirstIndex3() {
let res = "12345678900.".characterIndex(of: "0", direction: .last)
XCTAssert(res == 10)
}

In the Swift version 3, String doesn't have functions like -
str.index(of: String)
If the index is required for a substring, one of the ways to is to get the range. We have the following functions in the string which returns range -
str.range(of: <String>)
str.rangeOfCharacter(from: <CharacterSet>)
str.range(of: <String>, options: <String.CompareOptions>, range: <Range<String.Index>?>, locale: <Locale?>)
For example to find the indexes of first occurrence of play in str
var str = "play play play"
var range = str.range(of: "play")
range?.lowerBound //Result : 0
range?.upperBound //Result : 4
Note : range is an optional. If it is not able to find the String it will make it nil. For example
var str = "play play play"
var range = str.range(of: "zoo") //Result : nil
range?.lowerBound //Result : nil
range?.upperBound //Result : nil

Leo Dabus's answer is great. Here is my answer based on his answer using compactMap to avoid Index out of range error.
Swift 5.1
extension StringProtocol {
func ranges(of targetString: Self, options: String.CompareOptions = [], locale: Locale? = nil) -> [Range<String.Index>] {
let result: [Range<String.Index>] = self.indices.compactMap { startIndex in
let targetStringEndIndex = index(startIndex, offsetBy: targetString.count, limitedBy: endIndex) ?? endIndex
return range(of: targetString, options: options, range: startIndex..<targetStringEndIndex, locale: locale)
}
return result
}
}
// Usage
let str = "Hello, playground, playground, playground"
let ranges = str.ranges(of: "play")
ranges.forEach {
print("[\($0.lowerBound.utf16Offset(in: str)), \($0.upperBound.utf16Offset(in: str))]")
}
// result - [7, 11], [19, 23], [31, 35]

Have you considered using NSRange?
if let range = mainString.range(of: mySubString) {
//...
}

How to split a string at the last occurence of a sequence

Target: A string with a built-in separator shall be split in an int and another string. In the case that the separator sequence '###' occurs more than once, the string shall always be spliced at the last '###'.
Is there an operator like string.lastIndexOf("###"), like in C#?
This is how my parser looks like:
func parseTuple(from string: String) -> (String, Int)? {
let parsedString = string.components(separatedBy: "###")
if let tupleString = String(parsedString[0]), let tupleInt = Int(parsedString[1]) {
return (tupleString, tupleInt)
} else {
return nil
}
}

The range(of:...) method of String has a .backwards option
to find the last occurrence of a string.
Then substring(to:) and substring(from:) can be used with the
lower/upper bound of that range to extract the parts of the string
preceding/following the separator:
func parseTuple(from string: String) -> (String, Int)? {
if let theRange = string.range(of: "###", options: .backwards),
let i = Int(string.substring(from: theRange.upperBound)) {
return (string.substring(to: theRange.lowerBound), i)
} else {
return nil
}
}
Example:
if let tuple = parseTuple(from: "Connect###Four###Player###7") {
print(tuple)
// ("Connect###Four###Player", 7)
}
Swift 4 update:
func parseTuple(from string: String) -> (String, Int)? {
if let theRange = string.range(of: "###", options: .backwards),
let i = Int(string[theRange.upperBound...]) {
return (String(string[...theRange.lowerBound]), i)
} else {
return nil
}
}

let input = "Connect###Four###Player###7"
let seperator = "###"
// split components
var components = input.components(separatedBy: seperator)
// remove the last component ...
components = Array(components.dropLast())
// ... and re-join the remaining ones
let output = components.joined(separator: seperator)
print(output)
prints:
Connect###Four###Player

Split string by components and keep components in place

Unlike string.components(separatedBy: ...) I want to keep the separators in place in the resulting array. Code is more explanatory
let input = "foo&bar|hello"
let output = string.tokenize(splitMarks: ["&", "|"])
let desiredResult = ["foo", "&", "bar", "|", "hello"]
Is there any function in the standard library which does this? If not how can I implement such a function?

For that you need to loop through the String and check its each characters that is it tokens or not. You can make extension of String for that like this.
extension String {
func stringTokens(splitMarks: Set<String>) -> [String] {
var string = ""
var desiredOutput = [String]()
for ch in self.characters {
if splitMarks.contains(String(ch)) {
if !string.isEmpty {
desiredOutput.append(string)
}
desiredOutput.append(String(ch))
string = ""
}
else {
string += String(ch)
}
}
if !string.isEmpty {
desiredOutput.append(string)
}
return desiredOutput
}
}
Now you can call this function like this way.
let input = "foo&bar|hello"
print(input.stringTokens(splitMarks: ["&", "|"]))
Output
["foo", "&", "bar", "|", "hello"]

You can use rangeOfCharacter(from: CharacterSet, ...) in a loop to
find the next occurrence of a split mark in the string, and then
append both the preceding part and the separator to an array:
extension String {
func tokenize(splitMarks: String) -> [Substring] {
let cs = CharacterSet(charactersIn: splitMarks)
var result = [Substring]()
var pos = startIndex
while let range = rangeOfCharacter(from: cs, range: pos..<endIndex) {
// Append string preceding the split mark:
if range.lowerBound != pos {
result.append(self[pos..<range.lowerBound])
}
// Append split mark:
result.append(self[range])
// Update position for next search:
pos = range.upperBound
}
// Append string following the last split mark:
if pos != endIndex {
result.append(self[pos..<endIndex])
}
return result
}
}
Example:
let input = "foo&bar|hello"
let output = input.tokenize(splitMarks: "&|")
print(output)
// ["foo", "&", "bar", "|", "hello"]

Find the Range of the Nth word in a String

What I want is something like
"word1 word2 word3".rangeOfWord(2) => 6 to 10
The result could come as a Range or a tuple or whatever.
I'd rather not do the brute force of iterating over the characters and using a state machine. Why reinvent the lexer? Is there a better way?

In your example, your words are unique, and you can use the following method:
let myString = "word1 word2 word3"
let wordNum = 2
let myRange = myString.rangeOfString(myString.componentsSeparatedByString(" ")[wordNum-1])
// 6..<11
As pointed out by Andrew Duncan in the comments below, the above is only valid if your words are unique. If you have non-unique words, you can use this somewhat less neater method:
let myString = "word1 word2 word3 word2 word1 word3 word1"
let wordNum = 7 // 2nd instance (out of 3) of "word1"
let arr = myString.componentsSeparatedByString(" ")
var fromIndex = arr[0..<wordNum-1].map { $0.characters.count }.reduce(0, combine: +) + wordNum - 1
let myRange = Range<String.Index>(start: myString.startIndex.advancedBy(fromIndex), end: myString.startIndex.advancedBy(fromIndex+arr[wordNum-1].characters.count))
let myWord = myString.substringWithRange(myRange)
// string "word1" (from range 36..<41)
Finally, lets use the latter to construct an extension of String as you have wished for in your question example:
extension String {
private func rangeOfNthWord(wordNum: Int, wordSeparator: String) -> Range<String.Index>? {
let arr = myString.componentsSeparatedByString(wordSeparator)
if arr.count < wordNum {
return nil
}
else {
let fromIndex = arr[0..<wordNum-1].map { $0.characters.count }.reduce(0, combine: +) + (wordNum - 1)*wordSeparator.characters.count
return Range<String.Index>(start: myString.startIndex.advancedBy(fromIndex), end: myString.startIndex.advancedBy(fromIndex+arr[wordNum-1].characters.count))
}
}
}
let myString = "word1 word2 word3 word2 word1 word3 word1"
let wordNum = 7 // 2nd instance (out of 3) of "word1"
if let myRange = myString.rangeOfNthWord(wordNum, wordSeparator: " ") {
// myRange: 36..<41
print(myString.substringWithRange(myRange)) // prints "word1"
}
You can tweak the .rangeOfNthWord(...) method if word separation is not unique (say some words are separated by two blankspaces " ").
Also pointed out in the comments below, the use of .rangeOfString(...) is not, per se, pure Swift. It is, however, by no means bad practice. From Swift Language Guide - Strings and Characters:
Swift’s String type is bridged with Foundation’s NSString class. If
you are working with the Foundation framework in Cocoa, the entire
NSString API is available to call on any String value you create when
type cast to NSString, as described in AnyObject. You can also use a
String value with any API that requires an NSString instance.
See also the NSString class reference for rangeOfString method:
// Swift Declaration:
func rangeOfString(_ searchString: String) -> NSRange

I went ahead and wrote the state machine. (Grumble..) FWIW, here it is:
extension String {
private func halfOpenIntervalOfBlock(n:Int, separator sep:Character? = nil) -> (Int, Int)? {
enum State {
case InSeparator
case InPrecedingSeparator
case InWord
case InTarget
case Done
}
guard n > 0 else {
return nil
}
var state:State
if n == 1 {
state = .InPrecedingSeparator
} else {
state = .InSeparator
}
var separatorNum = 0
var startIndex:Int = 0
var endIndex:Int = 0
for (i, c) in self.characters.enumerate() {
let inSeparator:Bool
// A bit inefficient to keep doing this test.
if let s = sep {
inSeparator = c == s
} else {
inSeparator = c == " " || c == "\n"
}
endIndex = i
switch state {
case .InPrecedingSeparator:
if !inSeparator {
state = .InTarget
startIndex = i
}
case .InTarget:
if inSeparator {
state = .Done
}
case .InWord:
if inSeparator {
separatorNum += 1
if separatorNum == n - 1 {
state = .InPrecedingSeparator
} else {
state = .InSeparator
}
}
case .InSeparator:
if !inSeparator {
state = .InWord
}
case .Done:
break
}
if state == .Done {
break
}
}
if state == .Done {
return (startIndex, endIndex)
} else if state == .InTarget {
return (startIndex, endIndex + 1) // We ran off end.
} else {
return nil
}
}
func rangeOfWord(n:Int) -> Range<Index>? {
guard let (s, e) = self.halfOpenIntervalOfBlock(n) else {
return nil
}
let ss = self.startIndex.advancedBy(s)
let ee = self.startIndex.advancedBy(e)
return Range(start:ss, end:ee)
}
}

It's not really clear whether the string has to be considered divided in words by separators it may contains, or if you're just looking for a specific substring occurrence.
Anyway both cases could be addressed in this way in my opinion:
extension String {
func enumerateOccurencies(of pattern: String, _ body: (Range<String.Index>, inout Bool) throws -> Void) rethrows {
guard
!pattern.isEmpty,
count >= pattern.count
else { return }
var stop = false
var lo = startIndex
while !stop && lo < endIndex {
guard
let r = self[lo..<endIndex].range(of: pattern)
else { break }
try body(r, &stop)
lo = r.upperBound
}
}
}
You'll then set stop to true in the body closure once reached the desired occurrence number and capture the range passed to it:
let words = "word1, word1, word2, word3, word1, word3"
var matches = 0
var rangeOfThirdOccurencyOfWord1: Range<String.Index>? = nil
words.enumerateOccurencies(of: "word1") { range, stop in
matches +=1
stop = matches == 3
if stop {
rangeOfThirdOccurencyOfWord1 = range
}
}
Regarding the DFA: recently I've wrote one leveraging on Hashable and using a an Array of Dictionaries as its state nodes, but I've found that the method above is faster, cause maybe range(of:) uses finger-printing.
UPDATE
Otherwise you could also achieve that API you've mentioned in this way:
import Foundation
extension String {
func rangeOfWord(order: Int, separator: String) -> Range<String.Index>? {
precondition(order > 0)
guard
!isEmpty,
!separator.isEmpty,
separator.count < count
else { return nil }
var wordsSoFar = 0
var lo = startIndex
while let r = self[lo..<endIndex].range(of: separator) {
guard
r.lowerBound != lo
else {
lo = r.upperBound
continue
}
wordsSoFar += 1
guard
wordsSoFar < order
else { return lo..<r.lowerBound }
lo = r.upperBound
}
if
lo < endIndex,
wordsSoFar + 1 == order
{
return lo..<endIndex
}
return nil
}
}
let words = "word anotherWord oneMore lastOne"
if let r = words.rangeOfWord(order: 4, separator: " ") {
print(words[r])
} else {
print("not found")
}
Here order parameter refers to the nth order of the word in the string, starting from 1. I've also added the separator parameter to specify a string token to use for finding words in the string (it can also be defaulted to " " to be able to call the function without having to specify it).

Here's my attempt at an updated answer in Swift 5.5:
import Foundation
extension String {
func rangeOfWord(atPosition wordAt: Int) -> Range<String.Index>? {
let fullrange = self.startIndex..<self.endIndex
var count = 0
var foundAt: Range<String.Index>? = nil
self.enumerateSubstrings(in: fullrange, options: .byWords) { _, substringRange, _, stop in
count += 1
if count == wordAt {
foundAt = substringRange
stop = true // Stop the enumeration after the word range is found.
}
}
return foundAt
}
}
let lorem = "Morbi leo risus, porta ac consectetur ac, vestibulum at eros."
if let found = lorem.rangeOfWord(atPosition: 8) {
print("found: \(lorem[found])")
} else {
print("not found.")
}
This solution doesn't make a new array to contain the words so uses less memory (I have not tested but in theory it should use less memory). As much as possible, the build in method is used therefore less chance of bugs.

Swift 5 solution, which allows you to specify the word separator
extension String {
func rangeOfWord(atIndex wordIndex: Int) -> Range<String.Index>? {
let wordComponents = self.components(separatedBy: " ")
guard wordIndex < wordComponents.count else {
return nil
}
let characterEndCount = wordComponents[0...wordIndex].map { $0.count }.reduce(0, +)
let start = String.Index(utf16Offset: wordIndex + characterEndCount - wordComponents[wordIndex].count, in: self)
let end = String.Index(utf16Offset: wordIndex + characterEndCount, in: self)
return start..<end
}
}

Index of a substring in a string with Swift

I'm used to do this in JavaScript:
var domains = "abcde".substring(0, "abcde".indexOf("cd")) // Returns "ab"
Swift doesn't have this function, how to do something similar?

edit/update:
Xcode 11.4 • Swift 5.2 or later
import Foundation
extension StringProtocol {
func index<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> Index? {
range(of: string, options: options)?.lowerBound
}
func endIndex<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> Index? {
range(of: string, options: options)?.upperBound
}
func indices<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Index] {
ranges(of: string, options: options).map(\.lowerBound)
}
func ranges<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Range<Index>] {
var result: [Range<Index>] = []
var startIndex = self.startIndex
while startIndex < endIndex,
let range = self[startIndex...]
.range(of: string, options: options) {
result.append(range)
startIndex = range.lowerBound < range.upperBound ? range.upperBound :
index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
}
return result
}
}
usage:
let str = "abcde"
if let index = str.index(of: "cd") {
let substring = str[..<index] // ab
let string = String(substring)
print(string) // "ab\n"
}
let str = "Hello, playground, playground, playground"
str.index(of: "play") // 7
str.endIndex(of: "play") // 11
str.indices(of: "play") // [7, 19, 31]
str.ranges(of: "play") // [{lowerBound 7, upperBound 11}, {lowerBound 19, upperBound 23}, {lowerBound 31, upperBound 35}]
case insensitive sample
let query = "Play"
let ranges = str.ranges(of: query, options: .caseInsensitive)
let matches = ranges.map { str[$0] } //
print(matches) // ["play", "play", "play"]
regular expression sample
let query = "play"
let escapedQuery = NSRegularExpression.escapedPattern(for: query)
let pattern = "\\b\(escapedQuery)\\w+" // matches any word that starts with "play" prefix
let ranges = str.ranges(of: pattern, options: .regularExpression)
let matches = ranges.map { str[$0] }
print(matches) // ["playground", "playground", "playground"]

Using String[Range<String.Index>] subscript you can get the sub string. You need starting index and last index to create the range and you can do it as below
let str = "abcde"
if let range = str.range(of: "cd") {
let substring = str[..<range.lowerBound] // or str[str.startIndex..<range.lowerBound]
print(substring) // Prints ab
}
else {
print("String not present")
}
If you don't define the start index this operator ..< , it take the starting index. You can also use str[str.startIndex..<range.lowerBound] instead of str[..<range.lowerBound]

Swift 5
Find index of substring
let str = "abcdecd"
if let range: Range<String.Index> = str.range(of: "cd") {
let index: Int = str.distance(from: str.startIndex, to: range.lowerBound)
print("index: ", index) //index: 2
}
else {
print("substring not found")
}
Find index of Character
let str = "abcdecd"
if let firstIndex = str.firstIndex(of: "c") {
let index = str.distance(from: str.startIndex, to: firstIndex)
print("index: ", index) //index: 2
}
else {
print("symbol not found")
}

In Swift 4 :
Getting Index of a character in a string :
let str = "abcdefghabcd"
if let index = str.index(of: "b") {
print(index) // Index(_compoundOffset: 4, _cache: Swift.String.Index._Cache.character(1))
}
Creating SubString (prefix and suffix) from String using Swift 4:
let str : String = "ilike"
for i in 0...str.count {
let index = str.index(str.startIndex, offsetBy: i) // String.Index
let prefix = str[..<index] // String.SubSequence
let suffix = str[index...] // String.SubSequence
print("prefix \(prefix), suffix : \(suffix)")
}
Output
prefix , suffix : ilike
prefix i, suffix : like
prefix il, suffix : ike
prefix ili, suffix : ke
prefix ilik, suffix : e
prefix ilike, suffix :
If you want to generate a substring between 2 indices , use :
let substring1 = string[startIndex...endIndex] // including endIndex
let subString2 = string[startIndex..<endIndex] // excluding endIndex

Doing this in Swift is possible but it takes more lines, here is a function indexOf() doing what is expected:
func indexOf(source: String, substring: String) -> Int? {
let maxIndex = source.characters.count - substring.characters.count
for index in 0...maxIndex {
let rangeSubstring = source.startIndex.advancedBy(index)..<source.startIndex.advancedBy(index + substring.characters.count)
if source.substringWithRange(rangeSubstring) == substring {
return index
}
}
return nil
}
var str = "abcde"
if let indexOfCD = indexOf(str, substring: "cd") {
let distance = str.startIndex.advancedBy(indexOfCD)
print(str.substringToIndex(distance)) // Returns "ab"
}
This function is not optimized but it does the job for short strings.

There are three closely connected issues here:
All the substring-finding methods are over in the Cocoa NSString world (Foundation)
Foundation NSRange has a mismatch with Swift Range; the former uses start and length, the latter uses endpoints
In general, Swift characters are indexed using String.Index, not Int, but Foundation characters are indexed using Int, and there is no simple direct translation between them (because Foundation and Swift have different ideas of what constitutes a character)
Given all that, let's think about how to write:
func substring(of s: String, from:Int, toSubstring s2 : String) -> Substring? {
// ?
}
The substring s2 must be sought in s using a String Foundation method. The resulting range comes back to us, not as an NSRange (even though this is a Foundation method), but as a Range of String.Index (wrapped in an Optional, in case we didn't find the substring at all). However, the other number, from, is an Int. Thus we cannot form any kind of range involving them both.
But we don't have to! All we have to do is slice off the end of our original string using a method that takes a String.Index, and slice off the start of our original string using a method that takes an Int. Fortunately, such methods exist! Like this:
func substring(of s: String, from:Int, toSubstring s2 : String) -> Substring? {
guard let r = s.range(of:s2) else {return nil}
var s = s.prefix(upTo:r.lowerBound)
s = s.dropFirst(from)
return s
}
Or, if you prefer to be able to apply this method directly to a string, like this...
let output = "abcde".substring(from:0, toSubstring:"cd")
...then make it an extension on String:
extension String {
func substring(from:Int, toSubstring s2 : String) -> Substring? {
guard let r = self.range(of:s2) else {return nil}
var s = self.prefix(upTo:r.lowerBound)
s = s.dropFirst(from)
return s
}
}

Swift 5
let alphabet = "abcdefghijklmnopqrstuvwxyz"
var index: Int = 0
if let range: Range<String.Index> = alphabet.range(of: "c") {
index = alphabet.distance(from: alphabet.startIndex, to: range.lowerBound)
print("index: ", index) //index: 2
}

Swift 5
extension String {
enum SearchDirection {
case first, last
}
func characterIndex(of character: Character, direction: String.SearchDirection) -> Int? {
let fn = direction == .first ? firstIndex : lastIndex
if let stringIndex: String.Index = fn(character) {
let index: Int = distance(from: startIndex, to: stringIndex)
return index
} else {
return nil
}
}
}
tests:
func testFirstIndex() {
let res = ".".characterIndex(of: ".", direction: .first)
XCTAssert(res == 0)
}
func testFirstIndex1() {
let res = "12345678900.".characterIndex(of: "0", direction: .first)
XCTAssert(res == 9)
}
func testFirstIndex2() {
let res = ".".characterIndex(of: ".", direction: .last)
XCTAssert(res == 0)
}
func testFirstIndex3() {
let res = "12345678900.".characterIndex(of: "0", direction: .last)
XCTAssert(res == 10)
}

In the Swift version 3, String doesn't have functions like -
str.index(of: String)
If the index is required for a substring, one of the ways to is to get the range. We have the following functions in the string which returns range -
str.range(of: <String>)
str.rangeOfCharacter(from: <CharacterSet>)
str.range(of: <String>, options: <String.CompareOptions>, range: <Range<String.Index>?>, locale: <Locale?>)
For example to find the indexes of first occurrence of play in str
var str = "play play play"
var range = str.range(of: "play")
range?.lowerBound //Result : 0
range?.upperBound //Result : 4
Note : range is an optional. If it is not able to find the String it will make it nil. For example
var str = "play play play"
var range = str.range(of: "zoo") //Result : nil
range?.lowerBound //Result : nil
range?.upperBound //Result : nil

Leo Dabus's answer is great. Here is my answer based on his answer using compactMap to avoid Index out of range error.
Swift 5.1
extension StringProtocol {
func ranges(of targetString: Self, options: String.CompareOptions = [], locale: Locale? = nil) -> [Range<String.Index>] {
let result: [Range<String.Index>] = self.indices.compactMap { startIndex in
let targetStringEndIndex = index(startIndex, offsetBy: targetString.count, limitedBy: endIndex) ?? endIndex
return range(of: targetString, options: options, range: startIndex..<targetStringEndIndex, locale: locale)
}
return result
}
}
// Usage
let str = "Hello, playground, playground, playground"
let ranges = str.ranges(of: "play")
ranges.forEach {
print("[\($0.lowerBound.utf16Offset(in: str)), \($0.upperBound.utf16Offset(in: str))]")
}
// result - [7, 11], [19, 23], [31, 35]

Have you considered using NSRange?
if let range = mainString.range(of: mySubString) {
//...
}

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Swift extract string between strings using regex - swift

Related

Swift 4 How to find Index of a particular substring in String [duplicate]

How to split a string at the last occurence of a sequence

Split string by components and keep components in place

Find the Range of the Nth word in a String

Index of a substring in a string with Swift

Categories

Resources