Reverse Words with exclusion rules like anagrams and numbers - swift

Need some help!
I have a model function in a simple project for reversing words.
The questions is:
First - How can I reverse my words (sentences) to make an anagram? For example, I need to shift the first character in each reversed word on two positions by forward.
Example:
Original - Hello world
Reversed with anagrams: - lloeH lrdow
Second - How can I reverse my words (sentences) without numbers inside it?
Example:
Original - Hello world 1234
Reversed without numbers - olleH dlrow 1234
How can I do it in my code?
import Foundation
class ReverseWords {
public func reverse(textField: String) -> String {
if textField.isEmpty {
return ""
}
return textField.trimmingCharacters(in: .whitespacesAndNewlines)
.components(separatedBy: " ")
.map { String ( $0.reversed() ) }
.joined(separator: " ")
}
}

Check this out.
func anagram(sentence: String, separator: Character = " ") -> String {
sentence
.trimmingCharacters(in: .whitespacesAndNewlines)
.split(separator: separator)
.map { $0.contains(where: { $0.isNumber }) ? String($0) : String($0).reverseAndShift() }
.joined(separator: String(separator))
}
extension String {
func reverseAndShift(by offset: Int = 2) -> String {
var reversed = Array(reversed())
if let firstElement = reversed.first, offset < reversed.count {
reversed.removeFirst()
reversed.insert(firstElement, at: offset)
}
return String(reversed)
}
}
print(anagram(sentence: "Hello world")) -> lloeH lrdow
print(anagram(sentence: "Hello world 1234")) -> lloeH lrdow 1234

It’s helpful to break your problem into independently testable functions.
Here are two functions you should find useful:
func shift2(_ str: String) -> String {
guard str.count > 2 else { return str }
let first = str.prefix(1)
let secondthird = str.prefix(3).suffix(2)
let end = String(str.dropFirst(3))
return secondthird + first + end
}
func reverseWord(_ str: String) -> String {
guard Set(str).intersection(Set("0123456789")).isEmpty else { return str }
return String(str.reversed())
}
Examples
print(shift2("abcde"))
print(shift2("abc"))
print(shift2("ab"))
bcade
bca
ab
print(reverseWord("Hello"))
print(reverseWord("Hello1"))
print(reverseWord("12345"))
olleH
Hello1
12345

Related

swift string separation but include the

I need to separate a string to a array of substring but need to include ",.?!" as the substring in Swift.
var sentence = "What is your name?" into
var words = ["What", "is", "your", "name", "?"]
I know I can use this to separate the white space, but I need the ".,?!" to be separated into a word in the words array. How can I do that?
var words = sentence.components(separatedBy: " ")
I only get ["What", "is", "your", "name?"]
I need to separate the ? at the end word, and make words array like this:
var words = ["What", "is", "your", "name", "?"]
You can enumerate your substrings in range using .byWords options, append the substring to your words array, get the substring range upperBound and the enclosed range upperBound, remove the white spaces on the resulting substring and append it to the words array:
import Foundation
let sentence = "What is your name?"
var words: [String] = []
sentence.enumerateSubstrings(in: sentence.startIndex..., options: .byWords) { substring, range, enclosedRange, _ in
words.append(substring!)
let start = range.upperBound
let end = enclosedRange.upperBound
words += sentence[start..<end]
.split{$0.isWhitespace}
.map(String.init)
}
print(words) // "["What", "is", "your", "name", "?"]\n"
You can also use a regular expression to replace the punctuation by the same punctuation preceded by a space before splitting your words by whitespaces:
let sentence = "What is your name?"
let words = sentence
.replacingOccurrences(of: "[.,;:?!]",
with: " $0",
options: .regularExpression)
.split{$0.isWhitespace}
print(words) // "["What", "is", "your", "name", "?"]\n"
Swift native approach:
var sentence = "What is your name?"
for index in sentence
.indices
.filter({ sentence[$0].isPunctuation })
.reversed() {
sentence.insert(" ", at: index)
}
let words = sentence.split { $0.isWhitespace }
words.forEach { print($0) }
This will print:
What
is
your
name
?
This function will split on whitespace and also include each punctuation character as a separate string. Apostrophes are treated as part of a word, so "can't" and "it's" are kept together as a single string. This function will also handle double spaces and tabs.
func splitSentence(sentence: String) -> [String] {
var result : [String] = []
var word = ""
let si = sentence.startIndex
for i in 0..<sentence.count {
let c = sentence[sentence.index(si, offsetBy: i)]
if c.isWhitespace {
if word.count > 0 {
result.append(word)
word = ""
}
} else if (c.isLetter || (String(c) == "'")) {
word = word + String(c)
} else {
if word.count > 0 {
result.append(word)
word = ""
}
result.append(String(c))
}
}
if word.count > 0 {
result.append(word)
}
return result
}
Here is some testing code:
func test(_ sentence: String, _ answer: [String]) {
print("--------------------------------")
print("sentence=" + sentence)
let result : [String] = splitSentence(sentence: sentence)
for s in result {
print("s={" + s + "}")
}
if answer.count != result.count {
print("#### Answer count mismatch")
}
for i in 0..<answer.count {
if answer[i] != result[i] {
print("### Mismatch: {" + answer[i] + "} != {" + result[i] + "}")
}
}
}
func runTests() {
test("", [])
test(" ", [])
test(" ", [])
test(" a", ["a"])
test("a ", ["a"])
test(" a", ["a"])
test(" a ", ["a"])
test("a ", ["a"])
test("aa", ["aa"])
test("a a", ["a", "a"])
test("?", ["?"])
test("a?", ["a", "?"])
test("???", ["?", "?", "?"])
test("What is your name?", [ "What", "is", "your", "name", "?" ])
test("What is your name? ", [ "What", "is", "your", "name", "?" ])
test("La niña es linda.", [ "La", "niña", "es", "linda", "."])
test("ñññ ñ ññ ñ", [ "ñññ", "ñ", "ññ", "ñ" ])
test("It's the 'best'.", [ "It's", "the", "'best'", "." ])
test("¿Cómo te llamas?", [ "¿", "Cómo", "te", "llamas", "?" ])
test("你好吗?", [ "你好吗", "?" ])
}
XCTAssertEqual(
"¿What is your name? My name is 🐱, and I am a cat!"
.split(separator: " ")
.flatMap { $0.split(includingSeparators: \.isPunctuation) }
.map(Array.init)
.map { String($0) },
[ "¿", "What", "is", "your", "name", "?",
"My", "name", "is", "🐱", ",", "and", "I", "am", "a", "cat", "!"
]
)
public enum Spliteration<Element> {
case separator(Element)
case subSequence([Element])
}
public extension Array {
init(_ spliteration: Spliteration<Element>) {
switch spliteration {
case .separator(let separator):
self = [separator]
case .subSequence(let array):
self = array
}
}
}
public extension Sequence {
/// The first element of the sequence.
/// - Note: `nil` if the sequence is empty.
var first: Element? {
var iterator = makeIterator()
return iterator.next()
}
func split(includingSeparators getIsSeparator: #escaping (Element) -> Bool)
-> AnySequence< Spliteration<Element> > {
var separatorFromPrefixIteration: Element?
func process(next: Element?) -> Void {
separatorFromPrefixIteration =
next.map(getIsSeparator) == true
? next
: nil
}
process(next: first)
let prefixIterator = AnyIterator(
dropFirst(
separatorFromPrefixIteration == nil
? 0
: 1
),
processNext: process
)
return .init {
if let separator = separatorFromPrefixIteration {
separatorFromPrefixIteration = nil
return .separator(separator)
}
return Optional(
prefixIterator.prefix { !getIsSeparator($0) },
nilWhen: \.isEmpty
).map(Spliteration.subSequence)
}
}
}
public extension AnyIterator {
/// Use when `AnyIterator` is required / `UnfoldSequence` can't be used.
init<State>(
state: State,
_ getNext: #escaping (inout State) -> Element?
) {
var state = state
self.init { getNext(&state) }
}
/// Process iterations with a closure.
/// - Parameters:
/// - processNext: Executes with every iteration.
init<Sequence: Swift.Sequence>(
_ sequence: Sequence,
processNext: #escaping (Element?) -> Void
) where Sequence.Element == Element {
self.init( state: sequence.makeIterator() ) { iterator -> Element? in
let next = iterator.next()
processNext(next)
return next
}
}
}
public extension AnySequence {
/// Use when `AnySequence` is required / `AnyIterator` can't be used.
/// - Parameter getNext: Executed as the `next` method of this sequence's iterator.
init(_ getNext: #escaping () -> Element?) {
self.init( Iterator(getNext) )
}
}
public extension Optional {
/// Wraps a value in an optional, based on a condition.
/// - Parameters:
/// - wrapped: A non-optional value.
/// - getIsNil: The condition that will result in `nil`.
init(
_ wrapped: Wrapped,
nilWhen getIsNil: (Wrapped) throws -> Bool
) rethrows {
self = try getIsNil(wrapped) ? nil : wrapped
}
}
Not answering using swift, but I believe the algorithm can be emulated with any language.
Done the implementation using Java. The code uses standard Java libraries and not external ones.
private void setSpecialCharsAsLastArrayItem() {
String name = "?what is your name?";
String regexCompilation = "[$&+,:;=?##|]";
Pattern regex = Pattern.compile(regexCompilation);
Matcher matcher = regex.matcher(name);
StringBuilder regexStr = new StringBuilder();
while (matcher.find()) {
regexStr.append(matcher.group());
}
String stringOfSpecialChars = regexStr.toString();
String stringWithoutSpecialChars = name.replaceAll(regexCompilation, "");
String finalString = stringWithoutSpecialChars + " "+stringOfSpecialChars;
String[] splitString = finalString.split(" ");
System.out.println(Arrays.toString(splitString));
}
will print [what, is, your, name, ??]
Here is the solution (Swift 5):
let sentence = "What is your name?".replacingOccurrences(of: "?", with: " ?")
let words = sentence.split(separator: " ")
print(words)
Output:
["What", "is", "your", "name", "?"]

How to split a string at the last occurence of a sequence

Target: A string with a built-in separator shall be split in an int and another string. In the case that the separator sequence '###' occurs more than once, the string shall always be spliced at the last '###'.
Is there an operator like string.lastIndexOf("###"), like in C#?
This is how my parser looks like:
func parseTuple(from string: String) -> (String, Int)? {
let parsedString = string.components(separatedBy: "###")
if let tupleString = String(parsedString[0]), let tupleInt = Int(parsedString[1]) {
return (tupleString, tupleInt)
} else {
return nil
}
}
The range(of:...) method of String has a .backwards option
to find the last occurrence of a string.
Then substring(to:) and substring(from:) can be used with the
lower/upper bound of that range to extract the parts of the string
preceding/following the separator:
func parseTuple(from string: String) -> (String, Int)? {
if let theRange = string.range(of: "###", options: .backwards),
let i = Int(string.substring(from: theRange.upperBound)) {
return (string.substring(to: theRange.lowerBound), i)
} else {
return nil
}
}
Example:
if let tuple = parseTuple(from: "Connect###Four###Player###7") {
print(tuple)
// ("Connect###Four###Player", 7)
}
Swift 4 update:
func parseTuple(from string: String) -> (String, Int)? {
if let theRange = string.range(of: "###", options: .backwards),
let i = Int(string[theRange.upperBound...]) {
return (String(string[...theRange.lowerBound]), i)
} else {
return nil
}
}
let input = "Connect###Four###Player###7"
let seperator = "###"
// split components
var components = input.components(separatedBy: seperator)
// remove the last component ...
components = Array(components.dropLast())
// ... and re-join the remaining ones
let output = components.joined(separator: seperator)
print(output)
prints:
Connect###Four###Player

Sort a string to determine if it is a Anagram or Palindrome in Swift Xcode

I have a extension names String, with two functions names isAnagramOf and isPalindrome. The first function is supposed to take input as a String, then first it will replace whitespace with no space then sort and compare the string and return a Bool to determine if anagram or not.
The second function named isPalindrome and will also ignore whitespaces and capitalization, it will then reverse the String and compare to return if it is reversed.
I am new to swift and following a tutorial, but I kept getting these errors no matter how I tried to write it. I have gone through it at least 10 times now and cant get it to work
If anyone can help with this code that would be great, I would also be open to someone showing me another way to write. Perhaps as a array first then to sort the string, I am not sure though.
extension String {
func isAnagramOf(_ s: String) -> Bool {
let lowerSelf = self.lowercased().replacingOccurrences(of: " ", with: "")
let lowerOther = s.lowercased().replacingOccurrences(of: " ", with: "")
return lowerSelf.sorted() == lowerOther.sorted() // first error:Value of type 'String' has no member 'sorted
}
func isPalindrome() -> Bool {
let f = self.lowercased().replacingOccurrences(of: " ", with: "")
let s = String(describing: f.reversed()) //second error:Value of type 'String' has no member 'reversed'
return f == s
}
}
In Swift 3 a String itself is not a collection, so you have to
sort or reverse its characters view:
extension String {
func isAnagramOf(_ s: String) -> Bool {
let lowerSelf = self.lowercased().replacingOccurrences(of: " ", with: "")
let lowerOther = s.lowercased().replacingOccurrences(of: " ", with: "")
return lowerSelf.characters.sorted() == lowerOther.characters.sorted()
}
func isPalindrome() -> Bool {
let f = self.lowercased().replacingOccurrences(of: " ", with: "")
return f == String(f.characters.reversed())
}
}
A slightly more efficient method to check for a palindrome is
extension String {
func isPalindrome() -> Bool {
let f = self.lowercased().replacingOccurrences(of: " ", with: "")
return !zip(f.characters, f.characters.reversed()).contains(where: { $0 != $1 })
}
}
because no new String is created, and the function "short-circuits",
i.e. returns as soon as a non-match is found.
In Swift 4 a String is collection of its characters, and
the code simplifies to
extension String {
func isAnagramOf(_ s: String) -> Bool {
let lowerSelf = self.lowercased().replacingOccurrences(of: " ", with: "")
let lowerOther = s.lowercased().replacingOccurrences(of: " ", with: "")
return lowerSelf.sorted() == lowerOther.sorted()
}
func isPalindrome() -> Bool {
let f = self.lowercased().replacingOccurrences(of: " ", with: "")
return !zip(f, f.reversed()).contains(where: { $0 != $1 })
}
}
Note also that
let f = self.lowercased().replacingOccurrences(of: " ", with: "")
returns a string with all space characters removed. If you want
to remove all whitespace (spaces, tabulators, newlines, ...) then use
for example
let f = self.lowercased().replacingOccurrences(of: "\\s", with: "", options: .regularExpression)

Find the Range of the Nth word in a String

What I want is something like
"word1 word2 word3".rangeOfWord(2) => 6 to 10
The result could come as a Range or a tuple or whatever.
I'd rather not do the brute force of iterating over the characters and using a state machine. Why reinvent the lexer? Is there a better way?
In your example, your words are unique, and you can use the following method:
let myString = "word1 word2 word3"
let wordNum = 2
let myRange = myString.rangeOfString(myString.componentsSeparatedByString(" ")[wordNum-1])
// 6..<11
As pointed out by Andrew Duncan in the comments below, the above is only valid if your words are unique. If you have non-unique words, you can use this somewhat less neater method:
let myString = "word1 word2 word3 word2 word1 word3 word1"
let wordNum = 7 // 2nd instance (out of 3) of "word1"
let arr = myString.componentsSeparatedByString(" ")
var fromIndex = arr[0..<wordNum-1].map { $0.characters.count }.reduce(0, combine: +) + wordNum - 1
let myRange = Range<String.Index>(start: myString.startIndex.advancedBy(fromIndex), end: myString.startIndex.advancedBy(fromIndex+arr[wordNum-1].characters.count))
let myWord = myString.substringWithRange(myRange)
// string "word1" (from range 36..<41)
Finally, lets use the latter to construct an extension of String as you have wished for in your question example:
extension String {
private func rangeOfNthWord(wordNum: Int, wordSeparator: String) -> Range<String.Index>? {
let arr = myString.componentsSeparatedByString(wordSeparator)
if arr.count < wordNum {
return nil
}
else {
let fromIndex = arr[0..<wordNum-1].map { $0.characters.count }.reduce(0, combine: +) + (wordNum - 1)*wordSeparator.characters.count
return Range<String.Index>(start: myString.startIndex.advancedBy(fromIndex), end: myString.startIndex.advancedBy(fromIndex+arr[wordNum-1].characters.count))
}
}
}
let myString = "word1 word2 word3 word2 word1 word3 word1"
let wordNum = 7 // 2nd instance (out of 3) of "word1"
if let myRange = myString.rangeOfNthWord(wordNum, wordSeparator: " ") {
// myRange: 36..<41
print(myString.substringWithRange(myRange)) // prints "word1"
}
You can tweak the .rangeOfNthWord(...) method if word separation is not unique (say some words are separated by two blankspaces " ").
Also pointed out in the comments below, the use of .rangeOfString(...) is not, per se, pure Swift. It is, however, by no means bad practice. From Swift Language Guide - Strings and Characters:
Swift’s String type is bridged with Foundation’s NSString class. If
you are working with the Foundation framework in Cocoa, the entire
NSString API is available to call on any String value you create when
type cast to NSString, as described in AnyObject. You can also use a
String value with any API that requires an NSString instance.
See also the NSString class reference for rangeOfString method:
// Swift Declaration:
func rangeOfString(_ searchString: String) -> NSRange
I went ahead and wrote the state machine. (Grumble..) FWIW, here it is:
extension String {
private func halfOpenIntervalOfBlock(n:Int, separator sep:Character? = nil) -> (Int, Int)? {
enum State {
case InSeparator
case InPrecedingSeparator
case InWord
case InTarget
case Done
}
guard n > 0 else {
return nil
}
var state:State
if n == 1 {
state = .InPrecedingSeparator
} else {
state = .InSeparator
}
var separatorNum = 0
var startIndex:Int = 0
var endIndex:Int = 0
for (i, c) in self.characters.enumerate() {
let inSeparator:Bool
// A bit inefficient to keep doing this test.
if let s = sep {
inSeparator = c == s
} else {
inSeparator = c == " " || c == "\n"
}
endIndex = i
switch state {
case .InPrecedingSeparator:
if !inSeparator {
state = .InTarget
startIndex = i
}
case .InTarget:
if inSeparator {
state = .Done
}
case .InWord:
if inSeparator {
separatorNum += 1
if separatorNum == n - 1 {
state = .InPrecedingSeparator
} else {
state = .InSeparator
}
}
case .InSeparator:
if !inSeparator {
state = .InWord
}
case .Done:
break
}
if state == .Done {
break
}
}
if state == .Done {
return (startIndex, endIndex)
} else if state == .InTarget {
return (startIndex, endIndex + 1) // We ran off end.
} else {
return nil
}
}
func rangeOfWord(n:Int) -> Range<Index>? {
guard let (s, e) = self.halfOpenIntervalOfBlock(n) else {
return nil
}
let ss = self.startIndex.advancedBy(s)
let ee = self.startIndex.advancedBy(e)
return Range(start:ss, end:ee)
}
}
It's not really clear whether the string has to be considered divided in words by separators it may contains, or if you're just looking for a specific substring occurrence.
Anyway both cases could be addressed in this way in my opinion:
extension String {
func enumerateOccurencies(of pattern: String, _ body: (Range<String.Index>, inout Bool) throws -> Void) rethrows {
guard
!pattern.isEmpty,
count >= pattern.count
else { return }
var stop = false
var lo = startIndex
while !stop && lo < endIndex {
guard
let r = self[lo..<endIndex].range(of: pattern)
else { break }
try body(r, &stop)
lo = r.upperBound
}
}
}
You'll then set stop to true in the body closure once reached the desired occurrence number and capture the range passed to it:
let words = "word1, word1, word2, word3, word1, word3"
var matches = 0
var rangeOfThirdOccurencyOfWord1: Range<String.Index>? = nil
words.enumerateOccurencies(of: "word1") { range, stop in
matches +=1
stop = matches == 3
if stop {
rangeOfThirdOccurencyOfWord1 = range
}
}
Regarding the DFA: recently I've wrote one leveraging on Hashable and using a an Array of Dictionaries as its state nodes, but I've found that the method above is faster, cause maybe range(of:) uses finger-printing.
UPDATE
Otherwise you could also achieve that API you've mentioned in this way:
import Foundation
extension String {
func rangeOfWord(order: Int, separator: String) -> Range<String.Index>? {
precondition(order > 0)
guard
!isEmpty,
!separator.isEmpty,
separator.count < count
else { return nil }
var wordsSoFar = 0
var lo = startIndex
while let r = self[lo..<endIndex].range(of: separator) {
guard
r.lowerBound != lo
else {
lo = r.upperBound
continue
}
wordsSoFar += 1
guard
wordsSoFar < order
else { return lo..<r.lowerBound }
lo = r.upperBound
}
if
lo < endIndex,
wordsSoFar + 1 == order
{
return lo..<endIndex
}
return nil
}
}
let words = "word anotherWord oneMore lastOne"
if let r = words.rangeOfWord(order: 4, separator: " ") {
print(words[r])
} else {
print("not found")
}
Here order parameter refers to the nth order of the word in the string, starting from 1. I've also added the separator parameter to specify a string token to use for finding words in the string (it can also be defaulted to " " to be able to call the function without having to specify it).
Here's my attempt at an updated answer in Swift 5.5:
import Foundation
extension String {
func rangeOfWord(atPosition wordAt: Int) -> Range<String.Index>? {
let fullrange = self.startIndex..<self.endIndex
var count = 0
var foundAt: Range<String.Index>? = nil
self.enumerateSubstrings(in: fullrange, options: .byWords) { _, substringRange, _, stop in
count += 1
if count == wordAt {
foundAt = substringRange
stop = true // Stop the enumeration after the word range is found.
}
}
return foundAt
}
}
let lorem = "Morbi leo risus, porta ac consectetur ac, vestibulum at eros."
if let found = lorem.rangeOfWord(atPosition: 8) {
print("found: \(lorem[found])")
} else {
print("not found.")
}
This solution doesn't make a new array to contain the words so uses less memory (I have not tested but in theory it should use less memory). As much as possible, the build in method is used therefore less chance of bugs.
Swift 5 solution, which allows you to specify the word separator
extension String {
func rangeOfWord(atIndex wordIndex: Int) -> Range<String.Index>? {
let wordComponents = self.components(separatedBy: " ")
guard wordIndex < wordComponents.count else {
return nil
}
let characterEndCount = wordComponents[0...wordIndex].map { $0.count }.reduce(0, +)
let start = String.Index(utf16Offset: wordIndex + characterEndCount - wordComponents[wordIndex].count, in: self)
let end = String.Index(utf16Offset: wordIndex + characterEndCount, in: self)
return start..<end
}
}

Split a String without removing the delimiter in Swift

This might be a duplicate. I couldn't find the answer in Swift, so I am not sure.
componentsSeparatedByCharactersInSet removes the delimiter. If you separate by only one possible character it is easy to add it back. But what when you have a set?
Is there another method to split?
Swift 3 and 4 Versions
extension Collection {
func splitAt(isSplit: (Iterator.Element) throws -> Bool) rethrows -> [SubSequence] {
var p = self.startIndex
var result:[SubSequence] = try self.indices.flatMap {
i in
guard try isSplit(self[i]) else {
return nil
}
defer {
p = self.index(after: i)
}
return self[p...i]
}
if p != self.endIndex {
result.append(suffix(from: p))
}
return result
}
}
Thanks to Oisdk for getting me thinking.
This method works on CollectionTypes, rather than Strings, but it should be easy enough to adapt:
extension CollectionType {
func splitAt(#noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
return try indices
.filter { i in try isSplit(self[i]) }
.map { i in
defer { p = i }
return self[p..<i]
} + [suffixFrom(p)]
}
}
extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}
You could use it like this:
let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
let puncSet = Set("!.,:".characters)
sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)
// ["Hello", ", my name is oisdk", ". This should split", ": but only at punctuation", "!"]
Or, this version, which uses a for-loop, and splits after the delimiter:
extension CollectionType {
func splitAt(#noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = []
for i in indices where try isSplit(self[i]) {
result.append(self[p...i])
p = i.successor()
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}
extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}
let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
let puncSet = Set("!.,:".characters)
sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)
// ["Hello,", " my name is oisdk.", " This should split:", " but only at punctuation!"]
Or, if you wanted to get the most Swift features into one function (defer, throws, a Protocol extension, an evil flatMap, guard, and Optionals):
extension CollectionType {
func splitAt(#noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = try indices.flatMap { i in
guard try isSplit(self[i]) else { return nil }
defer { p = i.successor() }
return self[p...i]
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}
I came here looking for an answer to this question. Didn't find what I was looking for and ended up building this by repeated calls to .split(...) It isn't elegant but you can choose which delimiters are preserved and which aren't. There's probably a way to avoid the String <--> Substring conversions, anyone know?
var input = """
{All those moments will be (lost in time)},
like tears [in rain](. ([(Time to)] die))
"""
var separator: Character = "!"
var output: [String] = []
repeat {
let tokens = input.split(
maxSplits: 1,
omittingEmptySubsequences: false,
whereSeparator: {
switch $0 {
case "{", "}", "(", ")", "[", "]": // preserve
separator = $0; return true
case " ", "\n", ",", ".": // omit
separator = " "; return true
default:
return false
}
}
)
if tokens[0] != "" {
output.append(String(tokens[0]))
}
guard tokens.count == 2 else { break }
if separator != " " {
output.append(String(separator))
}
input = String(tokens[1])
} while true
for token in output { print("\(token)") }
In the case above, the selectors are not in actual sets. I didn't need that, but if you do, simply make these declarations,
let preservedDelimiters: Set<Character> = [ "{", "}", "(", ")", "[", "]" ]
let omittedDelimiters: Set<Character> = [ " ", "\n", ",", "." ]
and replace the whereSeparator function with:
whereSeparator: {
if preservedDelimiters.contains($0) {
separator = $0
return true
} else if omittedDelimiters.contains($0) {
separator = " "
return true
} else {
return false
}
}