MongoDB dataset : pairs not reducing or problem with script - mongodb

I'm new to programming and mongoDB and learning as I go, I'm attempting a mapreduce on a dataset using mongoDB. So far I've converted the csv to json and imported it into a mongoDB using compass.
In compass the data now looks like this :
_id :5bc4e11789f799178470be53
slug :"bitcoin"
symbol :"BTC"
name :"Bitcoin"
date :"2013-04-28"
ranknow :"1"
open :"135.3"
high :"135.98"
low :"132.1"
close :"134.21"
volume :"0"
market :"1500520000"
close_ratio :"0.5438"
spread :"3.88"
I've added each value as indices as follows, is this the right process so I can run a mapreduce against the data ?
db.testmyCrypto.getIndices()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "id",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"slug" : 1
},
"name" : "slug_1",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"symbol" : 2
},
"name" : "symbol_2",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"name" : 3
},
"name" : "name_3",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"data" : 4
},
"name" : "data_4",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"ranknow" : 4
},
"name" : "ranknow_4",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"ranknow" : 5
},
"name" : "ranknow_5",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"open" : 6
},
"name" : "open_6",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"high" : 7
},
"name" : "high_7",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"low" : 8
},
"name" : "low_8",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"volume" : 9
},
"name" : "volume_9",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"market" : 10
},
"name" : "market_10",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"close_ratio" : 11
},
"name" : "close_ratio_11",
"ns" : "myCrypto.testmyCrypto"
},
{
"v" : 2,
"key" : {
"spread" : 13
},
"name" : "spread_13",
"ns" : "myCrypto.testmyCrypto"
}
]
I've scraped the above and now im doing the following from the link to the map-reduce. Is this the correct output, someone ?
> db.testmyCrypto.mapReduce(function() { emit( this.slug, this.symbol ); }, function(key, values) { return Array.sum( values ) },
... {
... query: { date:"2013-04-28" },
... out: "Date 04-28"
... }
... )
{
"result" : "Date 04-28",
"timeMillis" : 837,
"counts" : {
"input" : 0,
"emit" : 0,
"reduce" : 0,
"output" : 0
},
"ok" : 1
}
I've added the "key value pairs" but I don't seem to be able to get anything from the data.
> db.testmyCrypto.mapReduce(function() { emit( this.slug, this.symbol, this.name, this.date, this.ranknow, this.open, this.high, this.low, this.close, this.volume, this.market, this.close_ratio, this.spread ); }, function(key, values) { return Array.sum( values ) }, { query: { slug:"bitcoin" }, out: "Date 04-28" } )
{
"result" : "Date 04-28",
"timeMillis" : 816,
"counts" : {
"input" : 0,
"emit" : 0,
"reduce" : 0,
"output" : 0
},
"ok" : 1 }
>

if you trying to sum some values then they need to numeric (when you importing data to mongo try to set type for values)
db.collectionName.mapReduce(
function() {
emit(
this.slug,
this.open
)
},
function(keySlug, valueOpen) {
return Array.sum(valueOpen)
},
{
query: { date:"2013-04-28" },
out: "Date 04-28"
}
)
this query will return you sum of open values for each slug filtered by date.
ps. you can do same thing with aggregation.
if you have any question let me know.

Related

MongoDB - how to optimise find query with regex search, with sort

I need to execute the following query:
db.S12_RU.find({"venue.raw":a,"title":/b|c|d|e/}).sort({"year":-1}).skip(X).limit(Y);
where X and Y are numbers.
The number of documents in my collection is:
208915369
Currently, this sort of query takes about 6 minutes to execute.
I have the following indexes:
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_"
},
{
"v" : 2,
"key" : {
"venue.raw" : 1
},
"name" : "venue.raw_1"
},
{
"v" : 2,
"key" : {
"venue.raw" : 1,
"title" : 1,
"year" : -1
},
"name" : "venue.raw_1_title_1_year_-1"
}
]
A standard document looks like this:
{ "_id" : ObjectId("5fc25fc091e3146fb10484af"), "id" : "1967181478", "title" : "Quality of Life of Swedish Women with Fibromyalgia Syndrome, Rheumatoid Arthritis or Systemic Lupus Erythematosus", "authors" : [ { "name" : "Carol S. Burckhardt", "id" : "2052326732" }, { "name" : "Birgitha Archenholtz", "id" : "2800742121" }, { "name" : "Kaisa Mannerkorpi", "id" : "240289002" }, { "name" : "Anders Bjelle", "id" : "2419758571" } ], "venue" : { "raw" : "Journal of Musculoskeletal Pain", "id" : "49327845" }, "year" : 1993, "n_citation" : 31, "page_start" : "199", "page_end" : "207", "doc_type" : "Journal", "publisher" : "Taylor & Francis", "volume" : "1", "issue" : "", "doi" : "10.1300/J094v01n03_20" }
Is there any way to make this query execute in a few seconds?

What index to be added in MongoDB to support $elemMatch query on embedded document

Suppose we have a following document
{
embedded:[
{
email:"abc#abc.com",
active:true
},
{
email:"def#abc.com",
active:false
}]
}
What indexing should be used to support $elemMatch query on email and active field of embedded doc.
Update on question :-
db.foo.aggregate([{"$match":{"embedded":{"$elemMatch":{"email":"abc#abc.com","active":true}}}},{"$group":{_id:null,"total":{"$sum":1}}}],{explain:true});
on querying this i am getting following output of explain on aggregate :-
{
"stages" : [
{
"$cursor" : {
"query" : {
"embedded" : {
"$elemMatch" : {
"email" : "abc#abc.com",
"active" : true
}
}
},
"fields" : {
"_id" : 0,
"$noFieldsNeeded" : 1
},
"planError" : "InternalError No plan available to provide stats"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"total" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"ok" : 1
}
I think mongodb internally not using index for this query.
Thanx in advance :)
Update on output of db.foo.stats()
db.foo.stats()
{
"ns" : "test.foo",
"count" : 2,
"size" : 480,
"avgObjSize" : 240,
"storageSize" : 8192,
"numExtents" : 1,
"nindexes" : 3,
"lastExtentSize" : 8192,
"paddingFactor" : 1,
"systemFlags" : 0,
"userFlags" : 1,
"totalIndexSize" : 24528,
"indexSizes" : {
"_id_" : 8176,
"embedded.email_1_embedded.active_1" : 8176,
"name_1" : 8176
},
"ok" : 1
}
db.foo.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.foo"
},
{
"v" : 1,
"key" : {
"embedded.email" : 1,
"embedded.active" : 1
},
"name" : "embedded.email_1_embedded.active_1",
"ns" : "test.foo"
},
{
"v" : 1,
"key" : {
"name" : 1
},
"name" : "name_1",
"ns" : "test.foo"
}
]
Should you decide to stick to that data model and your queries, here's how to create indexes that match the query:
You can simply index "embedded.email", or use a compound key of embedded indexes, i.e. something like
> db.foo.ensureIndex({"embedded.email" : 1 });
- or -
> db.foo.ensureIndex({"embedded.email" : 1, "embedded.active" : 1});
Indexing boolean fields is often not too useful, since their selectivity is low.

mongoDB does not combine 1d and 2d indexes, geo queries scans all documents irrespective of filters applied to limit the number of records

Below is the output from explain for one of the queries:
{
"cursor" : "GeoSearchCursor",
"isMultiKey" : false,
"n" : 0,
"nscannedObjects" : **199564**,
"nscanned" : 199564,
"nscannedObjectsAllPlans" : **199564**,
"nscannedAllPlans" : **199564**,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 1234,
"indexBounds" : {
},
"server" : "MongoDB",
"filterSet" : false
}
This query scans all the 199564 records, where as constrains applied in the filter for the query, which should be around few hundred records only.
Pointers would be much appreciated
Adding the query and indexes applied:
Query
{
"isfeatured" : 1 ,
"status" : 1 ,
"isfesturedseq" : 1 ,
"loc_long_lat" : {
"$near" : [ 76.966438 , 11.114906]
} ,
"city_id" : "40" ,
"showTime.0" : { "$exists" : true}}
Indexes
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"loc_long_lat" : "2d"
},
"name" : "loc_long_lat_2d",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"georand" : "2d"
},
"name" : "georand_2d",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"city_id" : 1
},
"name" : "city_id_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"endDatetime" : 1
},
"name" : "endDatetime_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"movieid" : 1
},
"name" : "movieid_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"theaterid" : 1
},
"name" : "theaterid_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"status" : 1
},
"name" : "status_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"isfeatured" : 1
},
"name" : "isfeatured_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"isfesturedseq" : 1
},
"name" : "isfesturedseq_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"is_popular" : 1
},
"name" : "is_popular_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"loc_name" : 1
},
"name" : "loc_name_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"est_city_id" : 1
},
"name" : "est_city_id_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"isfeatured" : 1,
"status" : 1,
"city_id" : 1
},
"name" : "isfeatured_1_status_1_city_id_1",
"ns" : "test_live.movies_theater_map",
"background" : true
},
{
"v" : 1,
"key" : {
"movieid" : 1,
"endDatetime" : 1,
"city_id" : 1,
"status" : 1
},
"name" : "movieid_1_endDatetime_1_city_id_1_status_1",
"ns" : "test_live.movies_theater_map",
"background" : 2
},
{
"v" : 1,
"key" : {
"movieid" : 1,
"endDatetime" : 1,
"city_id" : 1,
"status" : 1,
"georand" : 1
},
"name" : "movieid_1_endDatetime_1_city_id_1_status_1_georand_1",
"ns" : "test_live.movies_theater_map",
"background" : 2
},
{
"v" : 1,
"key" : {
"rand" : 1
},
"name" : "rand_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"isfeatured" : 1,
"city_id" : 1,
"status" : 1
},
"name" : "isfeatured_1_city_id_1_status_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"movieid" : 1,
"city_id" : 1
},
"name" : "movieid_1_city_id_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"loc_long_lat" : 1,
"is_popular" : 1,
"movieid" : 1,
"status" : 1
},
"name" : "loc_long_lat_1_is_popular_1_movieid_1_status_1",
"ns" : "test_live.movies_theater_map"
},
{
"v" : 1,
"key" : {
"status" : 1,
"city_id" : 1,
"theaterid" : 1,
"endDatetime" : 1
},
"name" : "status_1_city_id_1_theaterid_1_endDatetime_1",
"ns" : "test_live.movies_theater_map",
"background" : true
}
The $near operator uses a 2d or 2dsphere index to return documents in order from nearest to furthest. For a 2d index, a max of 100 documents are returned. Your query scanned every document because there were no matching documents and every document, from nearest to furthest, had to be scanned to check if it matched all the conditions.
I would suggest the following to improve the query:
Use the $maxDistance option, which is specified in radians for legacy coordinates, to limit the maximum number of documents scanned.
Use a 2dsphere index, ideally with GeoJSON points instead of legacy coordinates. You can have compound indexes with prefix keys to a geo index with a 2dsphere index, so you could index the query in part on all the other conditions to reduce the number of documents that need to be scanned. What version of MongoDB are you using? You may not have all of these features available with an old version.
Use limit to limit the maximum number of documents scanned. However, when the query has less results than the value of limit, you'll still scan every document.

Issue When I set shard key

I have a collection and I set shard it.
I have errors while I add a shard key!!!
mongos> sh.shardCollection('IBSng.connection_log', {login_time:1})
But I was shown this error:
Even I set compound shard with logout_time field with login_time, But the result shown to me this error.
{
"proposedKey" : {
"login_time" : 1
},
"curIndexes" : [
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"user_id" : 1
},
"name" : "user_id_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"ras_id" : 1
},
"name" : "ras_id_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"retry_count" : 1
},
"name" : "retry_count_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"credit_used" : 1
},
"name" : "credit_used_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"details.mac" : 1
},
"name" : "details.mac_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"username" : 1
},
"name" : "username_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"type_details.in_bytes" : 1,
"type_details.out_bytes" : 1
},
"name" : "type_details.in_bytes_1_type_details.out_bytes_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"details.kill_reason" : 1
},
"name" : "details.kill_reason_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"details.terminate_cause" : 1
},
"name" : "details.terminate_cause_1",
"ns" : "IBSng.connection_log"
},
{
"v" : 1,
"key" : {
"login_time" : -1,
"logout_time" : -1
},
"name" : "login_time_-1_logout_time_-1",
"ns" : "IBSng.connection_log",
"sparse" : false
}
],
"ok" : 0,
"errmsg" : "please create an index that starts with the shard key before sharding."
}
I Waiting for your answers.
Add an index for {login_time:1}, this is different to that compound one you have done.

DoctrineMongo near query

I have symfony 2 + doctrineMongo and i'm trying to use the geospatial query ->near but seems that this query don't work. Always return an empty array.
I followed this guide: http://www.doctrine-project.org/docs/mongodb_odm/1.0/en/reference/geospatial-queries.html
And i have this query in my repository:
$this->createQueryBuilder()
->field('coordinates')->near($longitude, $latitude)
->getQuery()
->execute();
There is a bug? How i can fix it?
My places findAll:
db.Place.find();
{ "_id" : ObjectId("4e4b82df3eee4f7e2c000000"), "coordinates" : { "latitude" : 23.1, "longitude" : 23.23 }, "name" : "Opium Mar" }
{ "_id" : ObjectId("4e5769f43eee4fc002000000"), "name" : "Sutton club", "coordinates" : { "latitude" : 2, "longitude" : 1 } }
{ "_id" : ObjectId("4e5cf2173eee4fc202000008"), "name" : "Scorpia", "coordinates" : { "latitude" : 23, "longitude" : 22 } }
And this is my index:
db.system.indexes.find();
{ "name" : "_id_", "ns" : "kzemos.User", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.Place", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.Party", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.Friend", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.UserParty", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.Invite", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.Photo", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.Group", "key" : { "_id" : 1 }, "v" : 0 }
{ "name" : "_id_", "ns" : "kzemos.places", "key" : { "_id" : 1 }, "v" : 0 }
{ "_id" : ObjectId("4e5deaced3c5c27e84059447"), "ns" : "kzemos.places", "key" : { "loc" : "2d" }, "name" : "loc_", "bits" : 26 }
{ "_id" : ObjectId("4e5dead9d3c5c27e84059448"), "ns" : "kzemos.places", "key" : { "coordinates" : "2d" }, "name" : "coordinates_", "bits" : 26 }`
When i use the near query in mongo shell i get this error:
db.Place.find( { coordinates : { $near : [50,50] } } )
error: {
"$err" : "can't find special index: 2d for: { coordinates: { $near: [ 50.0, 50.0 ] } }",
"code" : 13038
}
Thank you!
You need to have a Geospartial index on you collection like it is explained here http://www.mongodb.org/display/DOCS/Geospatial+Indexing