I tried to ask a question regarding nathandrake's #nathandrake post: How do I calculate a word-word co-occurrence matrix with sklearn?
import pandas as pd
def co_occurance_matrix(input_text,top_words,window_size):
co_occur = pd.DataFrame(index=top_words, columns=top_words)
for row,nrow in zip(top_words,range(len(top_words))):
for colm,ncolm in zip(top_words,range(len(top_words))):
count = 0
if row == colm:
co_occur.iloc[nrow,ncolm] = count
else:
for single_essay in input_text:
essay_split = single_essay.split(" ")
max_len = len(essay_split)
top_word_index = [index for index, split in enumerate(essay_split) if row in split]
for index in top_word_index:
if index == 0:
count = count + essay_split[:window_size + 1].count(colm)
elif index == (max_len -1):
count = count + essay_split[-(window_size + 1):].count(colm)
else:
count = count + essay_split[index + 1 : (index + window_size + 1)].count(colm)
if index < window_size:
count = count + essay_split[: index].count(colm)
else:
count = count + essay_split[(index - window_size): index].count(colm)
co_occur.iloc[nrow,ncolm] = count
return co_occur
My question is: what if my words are not one word but bigrams. For example:
corpus = ['ABC DEF IJK PQR','PQR KLM OPQ','LMN PQR XYZ DEF ABC']
words = ['ABC PQR','PQR DEF']
window_size =100
result = co_occurance_matrix(corpus,words,window_size)
result
I changed the word list into a bigram list, then the co_occurance_matrix function is not working. All are showing 0.
i have error in my code that write by python about fractions but I fase me proplem.
The error is that I don't know how to ask the object
Save the variable value of Objet and then call the aldea that do the mathematical operatio
plese help me to correct it
from fractions import*
# for testing vlaues
f12 = Fraction(1,2)
f44 = Fraction(4,4)
f128 = Fraction(12,8)
f32 = Fraction(3,2)
class Fraction:
def __init__ (self, numerator, denominator):
self.numerator = numerator
self.denominator = denominator
if self.denominator == 0:
raise ZeroDivisionError("sorry you are dividing by zero")
def sum(self, other):
numerator = (self.numerator * other.denominator) + (self.denominator +
other.numerator)
denominator = (self.denominator * other.denominator)
return Fraction(numerator/denominator)
def minus(self, other):
numerator = (self.numerator * other.denominator) - (self.denominator +
other.numerator)
denominator = (self.denominator * other.denominator)
return Fraction(numerator/denominator)
def times(self, other):
numerator = self.numerator * other.numerator
denominator = self.denominator * other.denominator
return Fraction(numerator/denominator)
def divid (self, other):
numerator = self.numerator * other.denominator
denominator = self.denominator * other.numerator
return Fraction(numerator/denominator)
def equals(self, other):
return self.numerator * other.denominator == self.denominator *
other.numerator
def test_suite(self,other):
if self == f12:
print(f"[{f12} + {f12} == {f12.sum(f12)} ] [4/4]")
elif self == f44 and other == f12:
print(f"[{f44} - {f12} == {f44.sum(f12)} ] [12/8]")
elif self == f12 and other == f44:
print(f"[{f12} + {f44} == {f12.minus(f44)} ] [4/8]")
elif self == f128 and other == f32:
print(f"[{f128} == {f32} == {f128.requal(f32)} ] [True]")
def main():
print("Wlecome to Fraction Caculator!")
numerator_1 = input("Fraction 1 Numerator: ")
denominator_1 = input("Fraction 1 denominator ")
f12 = Fraction(numerator_1, denominator_1)
numerator_2 = input("Fraction 2 Numerator: ")
denominator_2 = input("Fraction 2 denominator ")
f44 = Fraction(numerator_2, denominator_2)
#for texting parts
operation = print("Please enter the operation +, - , *, //, %, == ")
if operation == "+":
f12.sum(f44)
elif operation == "-":
f12.mins(f44)
elif operation == "*":
f12.times(f44)
elif operation == "//":
f12.divid(f44)
#for texting values
if __name__ == "_ _ main _ _ ":
main()
As I'm a beginner in coding I wanted to try to find the first three repeated numbers in a list. My problem is that in my code when there is a number repeated three, the code breaks.
The usual, remove, pop, and del, don't work as they delete one element in the list.
import random
r = random.randint
string = ""
def first_repeat(myList):
myList = sorted(list(myList))
print(myList)
number = 0
final_numbers = []
loop = 0
while loop < 2:
try:
if number == 0:
number += 1
else:
if myList[loop] == myList[loop-1]:
final_numbers.append(myList[loop])
else:
myList.pop(loop)
myList.pop (loop-1)
number = 0
if loop == 0 :
loop += 1
else:
loop -= 1
if len(final_numbers) > 3:
return final_numbers[0], final_numbers[1], final_numbers[2]
if len(myList) <=1:
loop += 2
except:
continue
return final_numbers
for n in range(20):
string = string+str(r(0,9))
print(first_repeat(string))
the expected result should be at the first three repeated numbers.
I added some print statements so you can go through your program and find out where the logic is wrong with your code.
import random
r = random.randint
string = ""
def first_repeat(myList):
myList = sorted(list(myList))
print(myList)
number = 0
final_numbers = []
loop = 0
while loop < 2:
print( 'inside while loop: loop = {}'.format( loop ))
try:
if number == 0:
number += 1
else:
if myList[loop] == myList[loop-1]:
print( 'in -> if myList[loop] == myList[loop-1]' )
final_numbers.append(myList[loop])
print( 'final_numbers: [{}]'.format( ','.join( final_numbers )))
else:
print( 'in first -> else' )
myList.pop(loop)
myList.pop (loop-1)
number = 0
print( 'myList: [{}]'.format( ','.join( myList ) ))
if loop == 0 :
loop += 1
else:
loop -= 1
if len(final_numbers) > 3:
print( 'returning final numbers' )
print( final_numbers )
return final_numbers[0], final_numbers[1], final_numbers[2]
if len(myList) <=1:
loop += 2
except:
continue
print( 'at end of this loop final numbers is: [{}]'.format( ','.join( final_numbers)))
print( 'press any key to continue loop: ')
input()
return final_numbers
for n in range(20):
string = string+str(r(0,9))
print(first_repeat(string))
Following is a method to do it taking advantage of pythons defaultdict
https://docs.python.org/2/library/collections.html#collections.defaultdict
#import defaultdict to keep track of number counts
from collections import defaultdict
#changed parameter name since you are passing in a string, not a list
def first_repeat( numbers_string ):
#create a dictionary - defaulddict( int ) is a dictionary with keys
#instantiated to 0 - (instead of throwing a key error)
number_count = defaultdict( int )
#convert your string to a list of integers - look up list iterations
numbers = [ int( s ) for s in list( numbers )]
# to store the repeated numbers
first_three_repeats = []
for number in numbers:
# for each number in the list, increment when it is seen
number_count[number] += 1
#at first occurence of 3 numbers, return the number
if number_count[number] == 2:
first_three_repeats.append( number )
if len( first_three_repeats ) == 3:
return first_three_repeats
#if here - not three occurrences of repeated numbers
return []
for n in range(20):
string = string+str(r(0,9))
print( findFirstThreeNumbers( string ))
I want to use the function serial_circuit with three different inputs (1,2,3). The function is suppose to accept multiple inputs from user until
user hits return, end program and sums up all inputs. The program only takes one input and display it.
def serial_circuit(num1):
num = 0
while(True):
try:
num += num1
except:
break
return num
print("1.Solve for serial resistance: ")
print("2.Solve for serial coils: ")
print("3.Solve for parallel capacitors: ")
choice = input("Enter choice: ")
if choice == '1':
num1 = float(input("Enter resistor value: "))
num = serial_circuit(num1)
print(f"Total resistance = {(num)} ohms")
elif choice == '2':
num1 = float(input("Enter coil value: "))
num = serial_circuit(num1)
print(f"Total inductance = {(num)} henrys")
elif choice == '3':
num1 = float(input("Enter capacitor value: "))
num = serial_circuit(num1)
print(f"Total capacitance = {(num):.6f} farads")
One of your problems is that you have the return statement inside the loop - notice your indentation - during the first iteration of the loop the last thing that will happen within the loop is returning from the serial_circuit function.
Another problem is that you are asking for the input only once - outside of the loop.
Take a look at this solution:
def serial_circuit(text):
num = 0
finishedEntering = False
while(not finishedEntering):
try:
received_input = input(text)
if received_input == "":
finishedEntering = True
else:
num1 = float(received_input)
num += num1
except:
break
return num
print("1.Solve for serial resistance: ")
print("2.Solve for serial coils: ")
print("3.Solve for parallel capacitors: ")
choice = input("Enter choice: ")
if choice == '1':
num = serial_circuit("Enter resistor value or hit enter to finish: ")
print(f"Total resistance = {(num)} ohms")
elif choice == '2':
num = serial_circuit("Enter coil value or hit enter to finish: ")
print(f"Total inductance = {(num)} henrys")
elif choice == '3':
num = serial_circuit("Enter capacitor value or hit enter to finish: ")
print(f"Total capacitance = {(num):.6f} farads")
Google implemented a captcha to block people from accessing the TTS translate API https://translate.google.com/translate_tts?ie=UTF-8&q=test&tl=zh-TW. I was using it in my mobile application. Now, it is not returning anything. How do I get around the captcha?
Add the qualifier '&client=tw-ob' to the end of your query.
https://translate.google.com/translate_tts?ie=UTF-8&q=test&tl=zh-TW&client=tw-ob
This answer no longer works consistently. Your ip address will be blocked by google temporarily if you abuse this too much.
there are 3 main issues:
you must include "client" in your query string (client=t seems to work).
(in case you are trying to retrieve it using AJAX) the Referer of the HTTP request must be https://translate.google.com/
"tk" field changes for every query, and it must be populated with a matching hash:
tk = hash(q, TKK), where q is the text to be TTSed, and TKK is a var in the global scope when you load translate.google.com: (type 'window.TKK' in the console). see the hash function at the bottom of this reply (calcHash).
to summarize:
function generateGoogleTTSLink(q, tl, tkk) {
var tk = calcHash(q, tkk);
return `https://translate.google.com/translate_tts?ie=UTF-8&total=1&idx=0&client=t&ttsspeed=1&tl=${tl}&tk=${tk}&q=${q}&textlen=${q.length}`;
}
generateGoogleTTSLink('ciao', 'it', '410353.1336369826');
// see definition of "calcHash" in the bottom of this comment.
=> to get your hands on a TKK, you can open Google Translate website, then type "TKK" in developer tools' console (e.g.: "410353.1336369826").
NOTE that TKK value changes every hour, and so, old TKKs might get blocked at some point, and refreshing it may be necessary (although so far it seems like old keys can work for a LONG time).
if you DO wish to periodically refresh TKK, it can be automated pretty easily, but not if you're running your code from the browser.
you can find a full NodeJS implementation here:
https://github.com/guyrotem/google-translate-server.
it exposes a minimal TTS API (query, language), and is deployed to a free Heroku server, so you can test it online if you like.
function shiftLeftOrRightThenSumOrXor(num, opArray) {
return opArray.reduce((acc, opString) => {
var op1 = opString[1]; // '+' | '-' ~ SUM | XOR
var op2 = opString[0]; // '+' | '^' ~ SLL | SRL
var xd = opString[2]; // [0-9a-f]
var shiftAmount = hexCharAsNumber(xd);
var mask = (op1 == '+') ? acc >>> shiftAmount : acc << shiftAmount;
return (op2 == '+') ? (acc + mask & 0xffffffff) : (acc ^ mask);
}, num);
}
function hexCharAsNumber(xd) {
return (xd >= 'a') ? xd.charCodeAt(0) - 87 : Number(xd);
}
function transformQuery(query) {
for (var e = [], f = 0, g = 0; g < query.length; g++) {
var l = query.charCodeAt(g);
if (l < 128) {
e[f++] = l; // 0{l[6-0]}
} else if (l < 2048) {
e[f++] = l >> 6 | 0xC0; // 110{l[10-6]}
e[f++] = l & 0x3F | 0x80; // 10{l[5-0]}
} else if (0xD800 == (l & 0xFC00) && g + 1 < query.length && 0xDC00 == (query.charCodeAt(g + 1) & 0xFC00)) {
// that's pretty rare... (avoid ovf?)
l = (1 << 16) + ((l & 0x03FF) << 10) + (query.charCodeAt(++g) & 0x03FF);
e[f++] = l >> 18 | 0xF0; // 111100{l[9-8*]}
e[f++] = l >> 12 & 0x3F | 0x80; // 10{l[7*-2]}
e[f++] = l & 0x3F | 0x80; // 10{(l+1)[5-0]}
} else {
e[f++] = l >> 12 | 0xE0; // 1110{l[15-12]}
e[f++] = l >> 6 & 0x3F | 0x80; // 10{l[11-6]}
e[f++] = l & 0x3F | 0x80; // 10{l[5-0]}
}
}
return e;
}
function normalizeHash(encondindRound2) {
if (encondindRound2 < 0) {
encondindRound2 = (encondindRound2 & 0x7fffffff) + 0x80000000;
}
return encondindRound2 % 1E6;
}
function calcHash(query, windowTkk) {
// STEP 1: spread the the query char codes on a byte-array, 1-3 bytes per char
var bytesArray = transformQuery(query);
// STEP 2: starting with TKK index, add the array from last step one-by-one, and do 2 rounds of shift+add/xor
var d = windowTkk.split('.');
var tkkIndex = Number(d[0]) || 0;
var tkkKey = Number(d[1]) || 0;
var encondingRound1 = bytesArray.reduce((acc, current) => {
acc += current;
return shiftLeftOrRightThenSumOrXor(acc, ['+-a', '^+6'])
}, tkkIndex);
// STEP 3: apply 3 rounds of shift+add/xor and XOR with they TKK key
var encondingRound2 = shiftLeftOrRightThenSumOrXor(encondingRound1, ['+-3', '^+b', '+-f']) ^ tkkKey;
// STEP 4: Normalize to 2s complement & format
var normalizedResult = normalizeHash(encondingRound2);
return normalizedResult.toString() + "." + (normalizedResult ^ tkkIndex)
}
// usage example:
var tk = calcHash('hola', '409837.2120040981');
console.log('tk=' + tk);
// OUTPUT: 'tk=70528.480109'
You can also try this format :
pass q= urlencode format of your language
(In JavaScript you can use the encodeURI() function & PHP has the rawurlencode() function)
pass tl = language short name (suppose bangla = bn)
Now try this :
https://translate.google.com.vn/translate_tts?ie=UTF-8&q=%E0%A6%A2%E0%A6%BE%E0%A6%95%E0%A6%BE+&tl=bn&client=tw-ob
First, to avoid captcha, you have to set a proper user-agent like: "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"
Then to not being blocked you must provide a proper token ("tk" get parameter) for each single request.
On the web you can find many different kind of scripts that try to calculate the token after a lot of reverse engineering...but every time the big G change the algorithm you're stuck again, so it's much easier to retrieve your token just observing in deep similar requests to translate page (with your text in the url).
You can read the token time by time grepping "tk=" from the output of this simple code with phantomjs:
"use strict";
var page = require('webpage').create();
var system = require('system');
var args = system.args;
if (args.length != 2) { console.log("usage: "+args[0]+" text"); phantom.exit(1); }
page.onConsoleMessage = function(msg) { console.log(msg); };
page.onResourceRequested = function(request) { console.log('Request ' + JSON.stringify(request, undefined, 4)); };
page.open("https://translate.google.it/?hl=it&tab=wT#fr/it/"+args[1], function(status) {
if (status === "success") { phantom.exit(0); }
else { phantom.exit(1); }
});
so in the end you can get your speech with something like:
wget -U "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"
"http://translate.google.com/translate_tts?ie=UTF-8&tl=it&tk=52269.458629&q=ciao&client=t" -O ciao.mp3
(token are probably time based so this link may not work tomorrow)
I rewrote Guy Rotem's answer in Java, so if you prefer Java over Javascript, feel free to use:
public class Hasher {
public long shiftLeftOrRightThenSumOrXor(long num, String[] opArray) {
long result = num;
int current = 0;
while (current < opArray.length) {
char op1 = opArray[current].charAt(1); // '+' | '-' ~ SUM | XOR
char op2 = opArray[current].charAt(0); // '+' | '^' ~ SLL | SRL
char xd = opArray[current].charAt(2); // [0-9a-f]
assertError(op1 == '+'
|| op1 == '-', "Invalid OP: " + op1);
assertError(op2 == '+'
|| op2 == '^', "Invalid OP: " + op2);
assertError(('0' <= xd && xd <= '9')
|| ('a' <= xd && xd <='f'), "Not an 0x? value: " + xd);
int shiftAmount = hexCharAsNumber(xd);
int mask = (op1 == '+') ? ((int) result) >>> shiftAmount : ((int) result) << shiftAmount;
long subresult = (op2 == '+') ? (((int) result) + ((int) mask) & 0xffffffff)
: (((int) result) ^ mask);
result = subresult;
current++;
}
return result;
}
public void assertError(boolean cond, String e) {
if (!cond) {
System.err.println();
}
}
public int hexCharAsNumber(char xd) {
return (xd >= 'a') ? xd - 87 : Character.getNumericValue(xd);
}
public int[] transformQuery(String query) {
int[] e = new int[1000];
int resultSize = 1000;
for (int f = 0, g = 0; g < query.length(); g++) {
int l = query.charAt(g);
if (l < 128) {
e[f++] = l; // 0{l[6-0]}
} else if (l < 2048) {
e[f++] = l >> 6 | 0xC0; // 110{l[10-6]}
e[f++] = l & 0x3F | 0x80; // 10{l[5-0]}
} else if (0xD800 == (l & 0xFC00) &&
g + 1 < query.length() && 0xDC00 == (query.charAt(g + 1) & 0xFC00)) {
// that's pretty rare... (avoid ovf?)
l = (1 << 16) + ((l & 0x03FF) << 10) + (query.charAt(++g) & 0x03FF);
e[f++] = l >> 18 | 0xF0; // 111100{l[9-8*]}
e[f++] = l >> 12 & 0x3F | 0x80; // 10{l[7*-2]}
e[f++] = l & 0x3F | 0x80; // 10{(l+1)[5-0]}
} else {
e[f++] = l >> 12 | 0xE0; // 1110{l[15-12]}
e[f++] = l >> 6 & 0x3F | 0x80; // 10{l[11-6]}
e[f++] = l & 0x3F | 0x80; // 10{l[5-0]}
}
resultSize = f;
}
return Arrays.copyOf(e, resultSize);
}
public long normalizeHash(long encondindRound2) {
if (encondindRound2 < 0) {
encondindRound2 = (encondindRound2 & 0x7fffffff) + 0x80000000L;
}
return (encondindRound2) % 1_000_000;
}
/*
/ EXAMPLE:
/
/ INPUT: query: 'hola', windowTkk: '409837.2120040981'
/ OUTPUT: '70528.480109'
/
*/
public String calcHash(String query, String windowTkk) {
// STEP 1: spread the the query char codes on a byte-array, 1-3 bytes per char
int[] bytesArray = transformQuery(query);
// STEP 2: starting with TKK index,
// add the array from last step one-by-one, and do 2 rounds of shift+add/xor
String[] d = windowTkk.split("\\.");
int tkkIndex = 0;
try {
tkkIndex = Integer.valueOf(d[0]);
}
catch (Exception e) {
e.printStackTrace();
}
long tkkKey = 0;
try {
tkkKey = Long.valueOf(d[1]);
}
catch (Exception e) {
e.printStackTrace();
}
int current = 0;
long result = tkkIndex;
while (current < bytesArray.length) {
result += bytesArray[current];
long subresult = shiftLeftOrRightThenSumOrXor(result,
new String[] {"+-a", "^+6"});
result = subresult;
current++;
}
long encondingRound1 = result;
//System.out.println("encodingRound1: " + encondingRound1);
// STEP 3: apply 3 rounds of shift+add/xor and XOR with they TKK key
long encondingRound2 = ((int) shiftLeftOrRightThenSumOrXor(encondingRound1,
new String[] {"+-3", "^+b", "+-f"})) ^ ((int) tkkKey);
//System.out.println("encodingRound2: " + encondingRound2);
// STEP 4: Normalize to 2s complement & format
long normalizedResult = normalizeHash(encondingRound2);
//System.out.println("normalizedResult: " + normalizedResult);
return String.valueOf(normalizedResult) + "."
+ (((int) normalizedResult) ^ (tkkIndex));
}
}