Deal with accented character in mongodb - mongodb

I've been struggling to find an ideal way to deal with accented characters.
First i use this two methods to encode(decode) text before inserting(fetching) in mongo.
function encode_utf8(s) {
return unescape(encodeURIComponent(s));
}
function decode_utf8(s) {
return decodeURIComponent(unescape(s));
}
This method works fine when i fetch comments from mongodb and display them (ajax request) :
$.ajax({
url : "http:www.mywebsite.com/comments",
type : "get",
data : "key=" + env.key + "&login=" + login,
dataType : "json",
success: function(response) { console.log(decode_utf8(reponse.texte)); /* yay works fine */ },
error : function(jqXHR, textStatus, errorThrown) {}
});
But when i use the code (map function of MapReduce) below it's doesn't handle accent:
String map = "function map() { "
+ "var texte = decodeURIComponent(unescape(this.comment));"
+ "var words = texte.match(/\\w+/g);"
+ "if(words) "
+ for(var i=0; i<words.length; i++)
+ emit(words[i].toLowerCase(), 1);"
+ "}"
Ex: Instead of ùùdzedzed the result is dzedzed.
Any suggestion or workaround would be great.
EDIT :
My mongo content :
{ "_id" : ObjectId("55460441e4b00700737c56cc"), "id" : "c0ab4be5-f4f3-4c73-a7a0-f6863b6ce6511430651969105", "auteur_login" : "testlogin", "auteur_id" : 1, "comment" : "être où règlem ça", "date" : "Sun May 03 13:19:29 CEST 2015" }
{ "_id" : ObjectId("554612f1e4b0953aca0c0be0"), "id" : "aaa52859-5de6-469b-a17f-aa4615db77f71430655729171", "auteur_login" : "testlogin", "auteur_id" : 1, "comment" : "à la", "date" : "Sun May 03 14:22:09 CEST 2015" }
{ "_id" : ObjectId("55461c21e4b0643ea96f7933"), "id" : "ba3ae7c0-39a6-4c77-86ec-d193e39759b71430658081921", "auteur_login" : "testlogin", "auteur_id" : 1, "comment" : "voilà", "date" : "Sun May 03 15:01:21 CEST 2015" }
{ "_id" : ObjectId("5547f5ede4b055c2ffb1a592"), "id" : "a9a1e7d4-c28d-4f9e-98d5-3fe7fba3bb5e1430779373121", "auteur_login" : "testlogin", "auteur_id" : 1, "comment" : "vb vb vb", "date" : "Tue May 05 00:42:53 CEST 2015" }
{ "_id" : ObjectId("5547f5fbe4b055c2ffb1a593"), "id" : "b5ad2b7e-987f-4d32-b5ca-bc06bdc57f611430779387478", "auteur_login" : "testlogin", "auteur_id" : 1, "comment" : "vb vb", "date" : "Tue May 05 00:43:07 CEST 2015" }
{ "_id" : ObjectId("5548b88ee4b029bc67d7a638"), "id" : "451e4e0d-c15a-4657-82c3-7760555b6c811430829198040", "auteur_login" : "testlogin", "auteur_id" : 1, "comment" : "ecrit n'importe quoi", "date" : "Tue May 05 14:33:18 CEST 2015" }
{ "_id" : ObjectId("554b9409e4b004c1b252f9cb"), "id" : "f36c3b49-72b1-48f4-ae92-a7933bc1e2761431016457515", "auteur_login" : "moi12345", "auteur_id" : 9, "comment" : "salut", "date" : "Thu May 07 18:34:17 CEST 2015" }
{ "_id" : ObjectId("554b941be4b004c1b252f9cd"), "id" : "1adc6788-32be-4c3a-bd73-16dad0b4bb741431016475777", "auteur_login" : "moi12345", "auteur_id" : 9, "comment" : "regregerhg", "date" : "Thu May 07 18:34:35 CEST 2015" }
And this is the result :
"sfsdfs": 58.0 update
"ta": 58.0 update
"test1": 58.0 update
"teste": 9.666666666666666 update
"ton": 58.0 update
"tre": 29.0 insert
"try": 58.0 update
"tudiant": 58.0 insert
"tweed": 58.0 update
"une": 58.0 update
"va": 19.333333333333332 update
"vb": 29.0 update
"veux": 19.333333333333332 update
"vient": 58.0 update
"voil": 58.0 insert
Example: the last line i should be getting voilàinstead of voil

So, essentially, your "encode_utf8" method does a weird way of converting UTF-8 string into 8-bit bytes. Why are you doing that?
([de/en]codeURIComponent() uses UTF-8 encoding, [un]encode uses ISO-8859-1)
For symmetry, your "decode_utf8(s)" shall be
return decodeURIComponent(escape(s));
instead of "unescape".
Note that escape() and unescape() Javascript methods are deprecated
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/escape
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/unescape

Related

Filter (nested) boolean field in Mongo / Meteor

I have a Mongo collection (using Meteor) structured like so:
{
"_id" : "xx",
"name" : "xx",
"handle" : "xx",
"tweets" : [
{
"published" : true,
"tweet_id" : "xx",
"text" : "xx",
"created_at" : "Mon Jul 31 18:18:38 +0000 2017",
"retweet_count" : 0,
"from" : "x",
"from_full_name" : "x",
"from_profile_image" : "x"
},
{
"published" : true,
"tweet_id" : "xx",
"text" : "xx",
"created_at" : "Mon Jul 31 18:18:38 +0000 2017",
"retweet_count" : 0,
"from" : "x",
"from_full_name" : "x",
"from_profile_image" : "x"
},
{
"published" : false,
"tweet_id" : "xx",
"text" : "xx",
"created_at" : "Mon Jul 31 18:18:38 +0000 2017",
"retweet_count" : 0,
"from" : "x",
"from_full_name" : "x",
"from_profile_image" : "x"
},
]
}
I only want to display the published tweets. I am using a template helper to retrieve and filter:
return Tweets.find({
handle:handle,
"tweets.published": true
});
I cannot seem to get the nested filter on 'published' to work. All tweets are displayed, using the above code. I have tried many different permutations of the "tweets.published": true. What is the correct way to filter out the unpublished tweets?
Since the tweets field is an array of objects, not just one object, your method will not work.
First you should use the handle to find the correct document:
return Tweets.find({
handle: handle,
});
This must then be combined with a fields, to select tweets which should be returned.
return Tweets.find({
handle: handle,
}, {
fields: { tweets: { $elemMatch: { published: true } } }
});
the $elemMatch part specifies the tweet must be publised. More information on the mongo page.
EDIT
If the snippet must run on the client (in your case), you have other options. You can use a server publication with my snippet to only give published tweets to the client.
Alternatively, give the transform option to the find request.
return Tweets.find({
handle: handle
}, {
transform: doc => {
doc.tweets = doc.tweets.filter(tweet => tweet.published);
return doc;
}
});

how to count re tweet number in mongodb

I want to count how many retweets in MongoDB.I got twitter data by streaming API.
Here is my different query but none of them worked..Please help me
db.collection.find({"retweet_count":{$ne:0}}).count()
db.collection.find({retweeted:ture}).count()
db.collection.find({retweeted:false}).count()
Related data structure
{
"retweeted" : false,
"in_reply_to_status_id" : null,
"in_reply_to_status_id_str" : null,
"is_quote_status" : false,
"id_str" : "815351057677094912",
"created_at" : "Sun Jan 01 00:16:55 +0000 2017",
"id" : NumberLong("815351057677094912"),
"retweet_count" : 0,
"truncated" : false,
"contributors" : null
}

Querying a list in MongoDB

I have a document that looks like this:
{
"_id" : ObjectId("570fc2381d4899be8a8ec9d9"),
"statuses" : [
{
"created_at" : "Wed Apr 13 09:56:39 +0000 2016",
"id" : 7.20188946337153e+017,
"id_str" : "720188946337153024",
"text" : "RT #BCC_Assicura: #FormulaAuto la #polizza #Auto e #Moto economica BccPordenonese - #BCC #Assicurazioni #Click2go"
},
{
"created_at" : "Wed Apr 13 09:40:13 +0000 2016",
"id" : 7.20184809658708e+017,
"id_str" : "720184809658707970",
"text" : "Auto e moto storiche, vademecum su assicurazione e bollo - \n#autostoriche #bollo #RCauto #ASI #FMI"
}
]}
How do I query for all the records where the variable text contains the string "assicur"?
Thank you!
One possibility would be to use a regex;
> db.test.find({"statuses.text":{$regex: 'assicur'}})
That said, this will not be possible to index in mongodb, so it's probably best done along with other filters that cut the documents down to a small set before doing the string matching.

Incorrect response to mapReduce query in mongo-db

I have 1000 user records in collecton, in which 459 document has gender male and remaining as female
//document structure
> db.user_details.find().pretty()
{
"_id" : ObjectId("557e610d626754910f0974a4"),
"id" : 0,
"name" : "Leanne Flinn",
"email" : "leanne.flinn#unilogic.com",
"work" : "Unilogic",
"dob" : "Fri Jun 11 1965 20:50:58 GMT+0530 (IST)",
"age" : 5,
"gender" : "female",
"salary" : 35696,
"hobbies" : "Acrobatics,Meditation,Music"
}
{
"_id" : ObjectId("557e610d626754910f0974a5"),
"id" : 1,
"name" : "Edward Young",
"email" : "edward.young#solexis.com",
"work" : "Solexis",
"dob" : "Wed Feb 12 1941 16:45:53 GMT+0530 (IST)",
"age" : 1,
"gender" : "female",
"salary" : 72291,
"hobbies" : "Acrobatics,Meditation,Music"
}
{
"_id" : ObjectId("557e610d626754910f0974a6"),
"id" : 2,
"name" : "Haydee Milligan",
"email" : "haydee.milligan#dalserve.com",
"work" : "Dalserve",
"dob" : "Tue Sep 13 1994 13:45:04 GMT+0530 (IST)",
"age" : 17,
"gender" : "male",
"salary" : 20026,
"hobbies" : "Papier-Mache"
}
{
"_id" : ObjectId("557e610d626754910f0974a7"),
"id" : 3,
"name" : "Lyle Keesee",
"email" : "lyle.keesee#terrasys.com",
"work" : "Terrasys",
"dob" : "Tue Apr 25 1922 13:39:46 GMT+0530 (IST)",
"age" : 79,
"gender" : "female",
"salary" : 48032,
"hobbies" : "Acrobatics,Meditation,Music"
}
{
"_id" : ObjectId("557e610d626754910f0974a8"),
"id" : 4,
"name" : "Shea Mercer",
"email" : "shea.mercer#pancast.com",
"work" : "Pancast",
"dob" : "Mon Apr 08 1935 06:10:30 GMT+0530 (IST)",
"age" : 51,
"gender" : "male",
"salary" : 31511,
"hobbies" : "Acrobatics,Photography,Papier-Mache"
}
Number of users in each gender
> db.user_details.find({gender:'male'}).count()
459
>
> db.user_details.find({gender:'female'}).count()
541
> db.user_details.find({name:{$ne:null}}).count()
1000
> db.user_details.find({age:{$ne:null}}).count()
1000
Map reduce code
mapper = function(){
emit(this.gender, {name:this.name,age:this.age})
}
reducer = function(gender, users){
var res = 0;
users.forEach(function(user){
res = res + 1
})
return res;
}
db.user_details.mapReduce(mapper, reducer, {out: {inline:1}})
Why map reduce result has only 112 documents? It should contain 459 and 541 for male and female respectively, isn't it?
// Map reduce result
{
"results" : [
{
"_id" : "female",
"value" : 56
},
{
"_id" : "male",
"value" : 46
}
],
"timeMillis" : 45,
"counts" : {
"input" : 1000,
"emit" : 1000,
"reduce" : 20,
"output" : 2
},
"ok" : 1
}
Note : I know this is not a proper way to use map reduce, Actually i faced some more creepy problem in map reduce. Once i get solution to this question i could solve that
Your problem here is that you have missed one of the core concepts of how mapReduce works. The relevant documentation that explains this is found here:
MongoDB can invoke the reduce function more than once for the same key. In this case, the previous output from the reduce function for that key will become one of the input values to the next reduce function invocation for that key.
And then also a bit later:
the type of the return object must be identical to the type of the value emitted by the map function
What those two statements mean is you need to use the exact same signature issued from both the mapper and the reducer functions as the reduce process will indeed get called "multiple times".
This is how mapReduce deals with large data, but not necessarily processing all of the same values for a given "key" at once, but doing it in incremental "chunks":
There fore if all you want in the output is a "number" then all you "emit" is just a "number" as well:
db.collection.mapReduce(
function() {
emit(this.gender, this.age);
},
function(key,values) {
return Array.sum( values )
},
{ "out": { "inline": 1 } }
)
Or just "count" per type:
db.collection.mapReduce(
function() {
emit(this.gender, 1);
},
function(key,values) {
return Array.sum( values )
},
{ "out": { "inline": 1 } }
)
The point is "you need to put out the same as what you put in", as it will likely "go back in again". So whatever data you want to collect, the output structure for both mapper and reducer must be the same.
This is probably wrong.
users.forEach(function(user){
res = res + 1
})
Try this,
function(gender, users){
return Array.sum( users)
}
There is a mistake in the reduce function.
MONGODB reduce function can be called multiple times for the same KEY, so in your reduce code its getting overridden.
Also in map function you are emmitting the document of structure { user, age}, but in reduce function you are returning the count.
reduce = function(gender, doc) {
reducedVal = { user: 0, age: 0 };
for (var idx = 0; idx < doc.length; idx++) {
reducedVal.user += 1 ;
reducedVal.age += 1;
}
return reducedVal;
};
please check the below link as well:
http://thejackalofjavascript.com/mapreduce-in-mongodb/
This is a proper way to use map reduce(), for display gender-wise count of users
db.yourCollectionName.mapReduce(
function(){
emit(this.gender,1);
},
function(k,v){
return Array.sum(v);
},
{out:"genderCount"}
);
db.genderCount.find();

mongodb group and count, error in sharded environment

I have been looking around and searching the internet, but I couldn't find an answer. Maybe I am wrong with my aggregation, please correct me if I am wrong.
supposedly I have a data like this
mongos> db.data_analytics.find().sort({_id:-1}).limit(1).pretty()
{
"title" : "data weather currency",
"time" : "Sun Dec 08 2013 04:01:51 GMT+0900 (JST)",
"_id" : ObjectId("52a3709f52d744c201000ba6"),
"dataAnalytics" : [
{
"keyItem" : "currency-yen-php",
"currency" : 0.427044,
"from" : "JPY",
"to" : "PHP",
"time" : "Sun Dec 08 2013 04:01:49 GMT+0900 (JST)",
"key" : "currency"
},
{
"keyItem" : "weather-akiruno",
"main-temp" : "5.78",
"main-temp_min" : "2.78",
"main-temp_max" : "7.78",
"weather-main" : "Clouds",
"weather-description" : "scattered clouds",
"time" : "Sun Dec 08 2013 04:01:50 GMT+0900 (JST)",
"key" : "weather"
},
],
"__v" : 0
}
I want to group and count by "$dataAnalytics.currency", but I get an error in a Sharded environment like this
mongos> db.data_analytics.aggregate([{$unwind: "$dataAnalytics"}, {$group:{_id:"$dataAnalytics.currency", c: {$sum:1}}}])
{
"code" : 16390,
"ok" : 0,
"errmsg" : "exception: sharded pipeline failed on shard jeanepaul_set_pi_debian_01: { errmsg: \"exception: invalid operator \"$const\"\", code: 15999, ok: 0.0 }"
}
I have tried doing this in replica set successfully, but no luck in sharded environment. what could be the problem?
I appreciate any help, and thank you for any help in advance!
You're currently grouping by the entire dataAnalytics element, you need to add .currency (or some other individual field) to your _id like this:
db.data_analytics.aggregate([
{$unwind: "$dataAnalytics"},
{$group:{_id:"$dataAnalytics.currency", c: {$sum:1}}}])
Sir Ian, if you can see this, I have solved the problem. I added a $match to my aggregate. and here's my result
mongos> db.data_analytics.aggregate([{$unwind:"$dataAnalytics"},{$match:{"dataAnalytics.key":"currency"}},{$group:{_id:"$dataAnalytics.currency", total: {$sum:1}}}, {$limit:100}])
{
"result" : [
{
"_id" : 0.428345,
"total" : 3
},
{
"_id" : 0.428517,
"total" : 7
},
{
"_id" : 0.428511,
"total" : 63
},
{
"_id" : 0.42802,
"total" : 7
}
],
"ok" : 1
}
the error seems to be gone. Thank you again for the suggestion!