Find the repeated sequence in the line that go in a row - swift

Given a string of arbitrary length. I need to find 1 subsequences of identical characters that go in a row.
My function (there are two of them, but these are two parts of the same function) turned out to be complex and cumbersome and did not fit because of this. The function I need should be simple and not too long.
Example:
Input : str = "abcabc"
Output : abc
Input : str = "aa"
Output : a
Input : str = "abcbabcb"
Output : abcb
Input : str = "abcbca"
Output : bcbc
Input : str = "cbabc"
Output :
Input : str = "acbabc"
Output :
My unsuccessful function:
func findRepetition(_ p: String) -> [String:Int] {
var repDict: [String:Int] = [:]
var p = p
while p.count != 0 {
for i in 0...p.count-1 {
repDict[String(Array(p)[0..<i]), default: 0] += 1
}
p = String(p.dropFirst())
}
return repDict
}
var correctWords = [String]()
var wrongWords = [String]()
func getRepeats(_ p: String) -> Bool {
let p = p
var a = findRepetition(p)
for i in a {
var substring = String(Array(repeating: i.key, count: 2).joined())
if p.contains(substring) {
wrongWords.append(p)
return false
}
}
correctWords.append(p)
return true
}
I will be very grateful for your help!

Here's a solution using regular expression. I used a capture group that tries to match as many characters as possible such that the whole group repeats at least once.
import Foundation
func findRepetition(_ s: String) -> String? {
if s.isEmpty { return nil }
let pattern = "([a-z]+)\\1+"
let regex = try? NSRegularExpression(pattern: pattern, options: [])
if let match = regex?.firstMatch(in: s, options: [], range:
NSRange(location: 0, length: s.utf16.count)) {
let unitRange = match.range(at: 1)
return (s as NSString).substring(with: unitRange)
}
return nil
}
print(findRepetition("abcabc")) //prints abc
print(findRepetition("aa")) //prints a
print(findRepetition("abcbabcb")) //prints abcb
print(findRepetition("abcbca")) //prints bc
print(findRepetition("cbabc")) //prints nil
print(findRepetition("acbabc")) //prints nil

func findRepetitions(_ p : String) -> [String: Int]{
let half = p.count / 2 + 1
var result : [String : Int] = [:]
for i in 1..<half {
for j in 0...(p.count-i) {
let sub = (p as! NSString).substring(with: NSRange.init(location: j, length: i))
if let val = result[sub] {
result[sub] = val + 1
}else {
result[sub] = 1
}
}
}
return result
}
This is for finding repetitions of possible substrings in your string. Hope it can help

Here is a solution that is based on the Suffix Array Algorithm, that finds the longest substring that is repeated (contiguously):
func longestRepeatedSubstring(_ str: String) -> String {
let sortedSuffixIndices = str.indices.sorted { str[$0...] < str[$1...] }
let lcsArray = [0]
+
sortedSuffixIndices.indices.dropFirst().map { index in
let suffix1 = str[sortedSuffixIndices[index]...]
let suffix2 = str[sortedSuffixIndices[index - 1]...]
let commonPrefix = suffix1.commonPrefix(with: suffix2)
let count = commonPrefix.count
let repeated = suffix1.dropFirst(count).commonPrefix(with: commonPrefix)
return count == repeated.count ? count : 0
}
let maxRepeated = zip(sortedSuffixIndices.indices,lcsArray).max(by: { $0.1 < $1.1 })
if let tuple = maxRepeated, tuple.1 != 0 {
let suffix1 = str[sortedSuffixIndices[tuple.0 - 1]...]
let suffix2 = str[sortedSuffixIndices[tuple.0]...]
let longestRepeatedSubstring = suffix1.commonPrefix(with: suffix2)
return longestRepeatedSubstring
} else {
return ""
}
}
Here is an easy to understand tutorial about such an algorithm.
It works for these examples:
longestRepeatedSubstring("abcabc") //"abc"
longestRepeatedSubstring("aa") //"a"
longestRepeatedSubstring("abcbabcb") //"abcd"
longestRepeatedSubstring("abcbca") //"bcbc"
longestRepeatedSubstring("cbabc") //""
longestRepeatedSubstring("acbabc") //""
As well as these:
longestRepeatedSubstring("a😍ca😍c") //"a😍c"
longestRepeatedSubstring("Ab cdAb cd") //"Ab cd"
longestRepeatedSubstring("aabcbc") //"bc"
Benchmarks
Here is a benchmark that clearly shows that the Suffix Array algorithm is much faster than using a regular expression.
The result is:
Regular expression: 7.2 ms
Suffix Array : 0.1 ms

Related

How to solve a problem with using the method of branches and borders?

All words of the ternary language consist of only 3 letters: a, b, and c and all have a strictly specified length N. Words that do not contain two identical subsequences of letters in a row are considered correct. For example, abcacb is the correct word, and ababc is not the correct one, since the ab subsequences go there.
I tried to solve the problem with a complete enumeration of all possible combinations and a function that looked for a repeating sequence. However, this turned out to be the wrong decision. The problem needs to be solved somehow using the branch and bound method. I have absolutely no idea how this problem can be solved by this method. I would be very happy if someone provides examples or explains to me. I have already spent six days to solve this problem and am very tired.
My wrong solution:
import Foundation
func findRepetition(_ p: String) -> [String:Int] {
var repDict: [String:Int] = [:]
var p = p
while p.count != 0 {
for i in 0...p.count-1 {
repDict[String(Array(p)[0..<i]), default: 0] += 1
}
p = String(p.dropFirst())
}
return repDict
}
var correctWords = [String]()
var wrongWords = [String]()
func getRepeats(_ p: String) -> Bool {
let p = p
var a = findRepetition(p)
for i in a {
var substring = String(Array(repeating: i.key, count: 2).joined())
if p.contains(substring) {
wrongWords.append(p)
return false
}
}
correctWords.append(p)
return true
}
var counter = 0
func allLexicographicRecur (_ string: [String.Element], _ data: [String], _ last: Int, _ index: Int){
var length = string.count-1
var data = data
for i in 0...length {
data[index] = String(string[i])
if index == last {
if getRepeats(data.joined()) {
counter += 1
}
}else{
allLexicographicRecur(string, data, last, index+1)
}
}
}
func threeLanguage(_ l: Int) {
var alphabet = "abc"
var data = Array(repeating: "", count: l)
allLexicographicRecur(alphabet.sorted(), data, l-1, 0)
print("The specified word length: \(l), the number of correct words: \(counter)\n")
print("Correct words:\n\(correctWords)\n")
print("Wrong words:\n\(wrongWords)")
}
threeLanguage(3)
Example:
abca is the right word.
abab is wrong (ab).
aaaa is also wrong (a).
abcabc is also incorrect (abc).
If I correctly understood your problem, you need to separate you input string to parts N-length and check parts by your rules. Smth like this
let constant: Int = 3
extension String {
private func components(withLength length: Int) -> [String] {
return stride(from: 0, to: count, by: length).map {
let start = index(startIndex, offsetBy: $0)
let end = index(start, offsetBy: length, limitedBy: endIndex) ?? endIndex
return String(self[start ..< end])
}
}
var numberOfValidWords: Int {
var numberOfIncorrectWords = 0
let length = count - constant
let array = components(withLength: constant)
for component in array {
let computedLength = replacingOccurrences(of: component, with: "").count
if computedLength != length {
print("as is lengths are not equal, this part is met in string several times")
numberOfIncorrectWords += 1
continue
}
}
return array.count - numberOfIncorrectWords
}
}
Hope it will be helpful

How to remove duplicate characters from a string in Swift

ruby has the function string.squeeze, but I can't seem to find a swift equivalent.
For example I want to turn bookkeeper -> bokepr
Is my only option to create a set of the characters and then pull the characters from the set back to a string?
Is there a better way to do this?
Edit/update: Swift 4.2 or later
You can use a set to filter your duplicated characters:
let str = "bookkeeper"
var set = Set<Character>()
let squeezed = str.filter{ set.insert($0).inserted }
print(squeezed) // "bokepr"
Or as an extension on RangeReplaceableCollection which will also extend String and Substrings as well:
extension RangeReplaceableCollection where Element: Hashable {
var squeezed: Self {
var set = Set<Element>()
return filter{ set.insert($0).inserted }
}
}
let str = "bookkeeper"
print(str.squeezed) // "bokepr"
print(str[...].squeezed) // "bokepr"
I would use this piece of code from another answer of mine, which removes all duplicates of a sequence (keeping only the first occurrence of each), while maintaining order.
extension Sequence where Iterator.Element: Hashable {
func unique() -> [Iterator.Element] {
var alreadyAdded = Set<Iterator.Element>()
return self.filter { alreadyAdded.insert($0).inserted }
}
}
I would then wrap it with some logic which turns a String into a sequence (by getting its characters), unqiue's it, and then restores that result back into a string:
extension String {
func uniqueCharacters() -> String {
return String(self.characters.unique())
}
}
print("bookkeeper".uniqueCharacters()) // => "bokepr"
Here is a solution I found online, however I don't think it is optimal.
func removeDuplicateLetters(_ s: String) -> String {
if s.characters.count == 0 {
return ""
}
let aNum = Int("a".unicodeScalars.filter{$0.isASCII}.map{$0.value}.first!)
let characters = Array(s.lowercased().characters)
var counts = [Int](repeatElement(0, count: 26))
var visited = [Bool](repeatElement(false, count: 26))
var stack = [Character]()
var i = 0
for character in characters {
if let num = asciiValueOfCharacter(character) {
counts[num - aNum] += 1
}
}
for character in characters {
if let num = asciiValueOfCharacter(character) {
i = num - aNum
counts[i] -= 1
if visited[i] {
continue
}
while !stack.isEmpty, let peekNum = asciiValueOfCharacter(stack.last!), num < peekNum && counts[peekNum - aNum] != 0 {
visited[peekNum - aNum] = false
stack.removeLast()
}
stack.append(character)
visited[i] = true
}
}
return String(stack)
}
func asciiValueOfCharacter(_ character: Character) -> Int? {
let value = String(character).unicodeScalars.filter{$0.isASCII}.first?.value ?? 0
return Int(value)
}
Here is one way to do this using reduce(),
let newChar = str.characters.reduce("") { partial, char in
guard let _ = partial.range(of: String(char)) else {
return partial.appending(String(char))
}
return partial
}
As suggested by Leo, here is a bit shorter version of the same approach,
let newChar = str.characters.reduce("") { $0.range(of: String($1)) == nil ? $0.appending(String($1)) : $0 }
Just Another solution
let str = "Bookeeper"
let newChar = str.reduce("" , {
if $0.contains($1) {
return "\($0)"
} else {
return "\($0)\($1)"
}
})
print(str.replacingOccurrences(of: " ", with: ""))
Use filter and contains to remove duplicate values
let str = "bookkeeper"
let result = str.filter{!result.contains($0)}
print(result) //bokepr

string replace substring without NSString API

I would like to be able to find and replace occurrences of a substring in a native Swift string without bridging to the NS class. How can I accomplish this?
This is not a duplicate of this question, as that question is about replacing a single character. This question is about finding and replacing a substring, which may contain many characters.
Method without Foundation:
extension String {
func replacing(_ oldString: String, with newString: String) -> String {
guard !oldString.isEmpty, !newString.isEmpty else { return self }
let charArray = Array(self.characters)
let oldCharArray = Array(oldString.characters)
let newCharArray = Array(newString.characters)
var matchedChars = 0
var resultCharArray = [Character]()
for char in charArray {
if char == oldCharArray[matchedChars] {
matchedChars += 1
if matchedChars == oldCharArray.count {
resultCharArray.append(contentsOf: newCharArray)
matchedChars = 0
}
} else {
for i in 0 ..< matchedChars {
resultCharArray.append(oldCharArray[i])
}
if char == oldCharArray[0] {
matchedChars = 1
} else {
matchedChars = 0
resultCharArray.append(char)
}
}
}
return String(resultCharArray)
}
}
Example usage:
let myString = "Hello World HelHelloello Hello HellHellooo"
print(myString.replacing("Hello", with: "Hi"))
Output:
Hi World HelHiello Hi HellHioo
Method using Foundation:
You can use the replacingOccurrences method on the String struct.
let myString = "Hello World"
let newString = myString.replacingOccurrences(of: "World", with: "Everyone")
print(newString) // prints "Hello Everyone"
generic and pure Swift approach
func splitBy<T: RangeReplaceableCollection>(_ s:T, by:T)->[T] where T.Iterator.Element:Equatable {
var tmp = T()
var res = [T]()
var i:T.IndexDistance = 0
let count = by.count
var pc:T.Iterator.Element {
get {
i %= count
let idx = by.index(by.startIndex, offsetBy: i)
return by[idx]
}
}
for sc in s {
if sc != pc {
i = 0
if sc != pc {
} else {
i = i.advanced(by: 1)
}
} else {
i = i.advanced(by: 1)
}
tmp.append(sc)
if i == count {
tmp.removeSubrange(tmp.index(tmp.endIndex, offsetBy: -i)..<tmp.endIndex)
res.append(tmp)
tmp.removeAll()
}
}
res.append(tmp)
return res
}
func split(_ s:String, by:String)->[String] {
return splitBy(s.characters, by: by.characters).map(String.init)
}
extension RangeReplaceableCollection where Self.Iterator.Element: Equatable {
func split(by : Self)->[Self] {
return splitBy(self, by: by)
}
}
how to use it?
let str = "simple text where i would like to replace something with anything"
let pat = "something"
let rep = "anything"
let s0 = str.characters.split(by: pat.characters).map(String.init)
let res = s0.joined(separator: rep)
print(res) // simple text where i would like to replace anything with anything
let res2 = split(str, by: pat).joined(separator: rep)
print(res2) // simple text where i would like to replace anything with anything
let arr = [1,2,3,4,1,2,3,4,1,2,3]
let p = [4,1]
print(arr.split(by: p)) // [[1, 2, 3], [2, 3], [2, 3]]

How to sort array according to number of occurrence of string?

How to sort array according to number of occurrence of string
Example :
var array = ["Hello","Me","That","Me","Hello","Me","as","the"]
and sorted array should be like this
["Me","Hello","That","as","the"]
Updated For Swift 3
var array = ["Hello","Me","That","Me","Hello","Me","as","the"]
var counts:[String:Int] = [:]
for item in array {
counts[item] = (counts[item] ?? 0) + 1
}
print(counts)
let result = counts.sorted { $0.value > $1.value }.map { $0.key }
print(result)
array.removeAll()
for string in result {
array.append(string)
}
print(array)
This is what I have been able to come up with:
var array = ["Hello","Me","That","Me","Hello","Me","as","the"]
// record the occurences of each item
var dict = [String: Int]()
for item in array {
if dict[item] == nil {
dict[item] = 1
} else {
dict[item]! += 1
}
}
// here I sort the dictionary by comparing the occurrences and map it so that the result contains only the key (the string)
let result = dict.sorted { $0.value > $1.value }.map { $0.key }
Try this -
It is tested and working as expected --
let arrayName = ["Hello","Me","That","Me","Hello","Me","as","the"]
var counts:[String:Int] = [:]
for item in arrayName {
counts[item] = (counts[item] ?? 0) + 1
}
let array = counts.keysSortedByValue(isOrderedBefore: >)
print(array) // Output - ["Me", "Hello", "the", "That", "as"]
Create Dictionary extension -
extension Dictionary {
func sortedKeys(isOrderedBefore:(Key,Key) -> Bool) -> [Key] {
return Array(self.keys).sorted(by: isOrderedBefore)
}
// Faster because of no lookups, may take more memory because of duplicating contents
func keysSortedByValue(isOrderedBefore:(Value, Value) -> Bool) -> [Key] {
return Array(self)
.sorted() {
let (_, lv) = $0
let (_, rv) = $1
return isOrderedBefore(lv, rv)
}
.map {
let (k, _) = $0
return k
}
}
}
It looks simple.
1. Take distinct from your array.
2. Make count according to distinct list.
3. Save results in collection - ie Dictionary.
4. Sort new collection.
Loop through the array and maintain a word count dictionary. Make sure the dictionary can be sorted based on values and finally obtain the set of keys and transform it back into an array.
This should work.
var array = ["Hello","Me","That","Me","Hello","Me","as","the"]
var tR : [String : Int] = [:]
let finalResult = array.reduce(tR) { result, item in
var tArr : [String: Int] = result
if let count = tArr[item] {
tArr[item] = count+1
} else {
tArr[item] = 1
}
return tArr
}
.sorted(by: { item1, item2 in
return item1.value > item2.value
}).map() { $0.key }
Please try this, hope it helps
var terms = ["Hello","Me","That","Me","Hello","Me","as","the"]
var termFrequencies = [String: Int]()
for t in terms {
if termFrequencies[t] == nil {
termFrequencies[t] = 1
} else {
termFrequencies[t] = termFrequencies[t]! + 1
}
}
for value in terms {
let index = termFrequencies[value] ?? 0
termFrequencies[value] = index + 1
}
let result = termFrequencies.sorted{$0.1 > $1.1}.map{$0.0}

Finding the first non-repeating character in a String using Swift

This finds the duplicates in the array, but i'm looking for something that finds the first non-repeating character in a string. I've been trying to figure out a way to do this and I cannot figure it out. This is the closest i've gotten.
var strArray = ["P","Q","R","S","T","P","R","A","T","B","C","P","P","P","P","P","C","P","P","J"]
println(strArray)
var filter = Dictionary<String,Int>()
var len = strArray.count
for var index = 0; index < len ;++index {
var value = strArray[index]
if (filter[value] != nil) {
strArray.removeAtIndex(index--)
len--
}else{
filter[value] = 1
}
}
println(strArray)
In order to tell if a character repeats itself, go through the entire array once, incrementing the count of occurrences in a dictionary:
let characters = ["P","Q","R","S","T","P","R","A","T","B","C","P","P","P","P","P","C","P","P","J"]
var counts: [String: Int] = [:]
for character in characters {
counts[character] = (counts[character] ?? 0) + 1
}
let nonRepeatingCharacters = characters.filter({counts[$0] == 1})
// ["Q", "S", "A", "B", "J"]
let firstNonRepeatingCharacter = nonRepeatingCharacters.first!
// "Q"
Here is a simple solution
let inputString = "PQRSTPRATBCPPPPPCPPJ"
func nonRepeat (_ input: String) -> String {
for char in input {
if input.firstIndex(of: char) == input.lastIndex(of: char) {
return String(char)
}
}
return ""
}
print (nonRepeat(inputString))
In the above example it would print "Q"
func firstNonRepeatedCharacter(input: String) -> Character?{
var characterCount : [Character : Int] = [:]
var uniqueCharacter: Character?
for character in input{
if let count = characterCount[character]{
characterCount[character] = count + 1
if(uniqueCharacter == character)
{
uniqueCharacter = nil
}
}
else{
characterCount[character] = 1
if(uniqueCharacter == nil){
uniqueCharacter = character
}
}
}
return uniqueCharacter
}
Without extra loop to find character from characterCount dictionary
Here is the way I have found to detect the first non-repeated character. It removes spaces and punctuation to find the actual letter or number that does not repeat.
extension String {
func removeNonAlphaNumChars() -> String {
let charSet = NSCharacterSet.alphanumericCharacterSet().invertedSet
return self
.componentsSeparatedByCharactersInSet(charSet)
.joinWithSeparator("")
}
var firstNonRepeatedCharacter: Character? {
let alphaNumString = self.removeNonAlphaNumChars()
let characters = alphaNumString.characters
let count = characters.count
guard count > 0 else { return nil }
// Find unique chars
var dict: [Character: Int?] = [:]
for (index, char) in characters.enumerate() {
if dict[char] != nil {
dict[char] = (nil as Int?)
}
else {
dict[char] = index
}
}
return dict.filter { $0.1 != nil }.sort { $0.1 < $1.1 }.first?.0
}
}
I totally wonder why the accepted answer was considered correct. They are using
.first
method of a dictionary and that according to documentation would return a random element in the dictionary and not the first element as a dictionary in swift is not ordered like an array.
please do find below an implementation that works
func firstNonRepeatingLetter(_ str: String) -> String{
var characterDict = [String : Int]()
for character in str{
let lower = character.lowercased()
if let count = characterDict[lower]{
characterDict[lower] = count + 1
}else{
characterDict[lower] = 1
}
}
let filtered = characterDict.filter { $0.value == 1}
for character in str{
let lower = character.lowercased()
if let _ = filtered[lower]{
return lower
}
}
return ""
}
firstNonRepeatingLetter("moonmen") would return "e".
We can iterate once and keep the letter counts inside a dictionary.
Then, iterate again and return first letter where we see it was encountered once only (or "_" if not found a non-repeating letter):
func firstNotRepeatingCharacter(s: String) -> Character {
var letterCounts: [String: Int] = [:]
var result: Character = "_"
for letter in s {
if let currentLetterCount = letterCounts[String(letter)] {
letterCounts[String(letter)] = currentLetterCount + 1
} else {
letterCounts[String(letter)] = 1
}
}
for letter in s {
if letterCounts[String(letter)] == 1 {
result = letter
break
}
}
return result
}
OrderedDictionary makes this easy for all Sequences of Hashables, not just Strings:
import struct OrderedCollections.OrderedDictionary
extension Sequence where Element: Hashable {
var firstUniqueElement: Element? {
OrderedDictionary(zip(self, true)) { _, _ in false }
.first(where: \.value)?
.key
}
}
/// `zip` a sequence with a single value, instead of another sequence.
public func zip<Sequence: Swift.Sequence, Constant>(
_ sequence: Sequence, _ constant: Constant
) -> LazyMapSequence<
LazySequence<Sequence>.Elements,
(LazySequence<Sequence>.Element, Constant)
> {
sequence.lazy.map { ($0, constant) }
}
func getFirstUniqueChar(string:String)->Character?{
var counts: [String: Int] = [:]
for character in string {
let charString = "\(character)"
counts[charString] = (counts[charString] ?? 0) + 1
}
let firstNonRepeatingCharacter = string.first {counts["\($0)"] == 1}
return firstNonRepeatingCharacter
}
print(getFirstUniqueChar(string: string))
import Foundation
import Glibc
var str:String = "aacbbcee"//your input string
var temp:String = ""
var dict:[Character:Int] = [:]
for char in str{
if let count = dict[char]{
dict[char] = count+1//storing values in dict and incrmenting counts o key
}
else{
dict[char] = 0
}
}
var arr:[Character] = []
for (key, value) in dict{
if value == 0{
arr.append(key)//filtering out, take characters which has value>0
} //int(arr)
}//print(arr.count)
if arr.count != 0{
outer:for char in str{//outer is labeling the loop
for i in arr{
if i == char{
print(i,"is first")//matching char with array elements if found break
break outer
}
else{
continue
}
}
}
}
else{
print("not found")
}
func firstNonRepeatedChar(string: String) -> Character {
var arr: [Character] = []
var dict: [Character : Int] = [:]
for character in string.description {
arr.append(character)
}
for character in arr {
dict[character] = (dict[character] ?? 0) + 1
}
let nonRepeatedArray = arr.filter { char in
if dict[char] == 1 {return true}
return false
}
let firstNonRepeatedChar = nonRepeatedArray.first
return firstNonRepeatedChar!
}
print(firstNonRepeatedChar(string: "strinstrig"))