How to group data already group in MongoDB - mongodb

I have a MongoDB query based in Objects as "Ticket" structure, each ticket usually contains Tasks Object and each of them have an Owner, if the Ticket is Open, the associate Owner is “OpnPrps.CurAgtNme”, by other hand if the ticket is Closed, the associate Owner is “Nms.CloAgt”.
This is how my database (JSON) look like:
{
"result" : [
{
"_id" : NumberLong(3131032306336),
"TicId" : 1147552,
"OrgId" : 729,
"Sts" : "Closed",
"CrtDat" : ISODate("2015-04-23T18:50:46.000Z"),
"CloDat" : ISODate("2015-04-23T19:46:26.000Z"),
"ShtDes" : "Copy of Employment agreement",
"Des" : "EE wants a copy of employment agreement. address was verified.",
"DesSum" : "EE wants a copy of employment agreement. address was verified.",
"Sol" : "ISIGHT TICKET NUMBER:<br><h1>US-04-15-01109</h1>",
"CrtAgtId" : 20444,
"CloAgtId" : 20149,
"CrtApp" : null,
"IsInt" : false,
"HasPrtTsk" : null,
"PexBodId" : "",
"PayGrp" : "",
"RvwDat" : null,
"RclDat" : null,
"IsDynDueDatDef" : false,
"OlaDat" : ISODate("2015-04-25T00:50:50.000Z"),
"SlaDat" : ISODate("2015-04-25T00:50:50.000Z"),
"DynDueDatElaDat" : null,
"ReaTim" : "00:00:00",
"LstUpd" : ISODate("2015-04-23T19:46:26.000Z"),
"VrsOnl" : 2,
"VrsArc" : 1,
"OpnPrps" : null,
"Nms" : {
"Cmp" : "Organization1 US",
"Org" : "Organization1",
"Srv" : "Policies",
"SrvGrp" : "17. Workforce Administration (NGR)",
"Wkg" : "WORKGROUP1",
"Pri" : "",
"Tir" : "T1",
"Src" : "Call",
"CrtAgt" : "Arun ",
"CloAgt" : "Felicia"
},
"Olas" : {
"_id" : 2,
"EntNam" : "ENTITY",
"DueDat" : ISODate("2015-04-25T00:50:50.000Z"),
"AmbDat" : ISODate("2015-04-24T20:50:49.000Z"),
"DueDatElaDat" : null,
"AmbDatElaDat" : null,
"SlaDuration" : 18,
"TotTim" : NumberLong(0),
"TotTimPndEnt" : NumberLong(0),
"TotTimPndEmp" : NumberLong(0),
"RclInSla" : false
},
"Tsks" : {
"_id" : 1,
"Typ" : "Planned",
"CrtDat" : ISODate("2015-04-23T18:50:46.000Z"),
"CloDat" : null,
"LstUpd" : ISODate("2015-04-23T18:50:46.000Z"),
"DueDat" : ISODate("2015-04-25T00:50:50.000Z"),
"TimCplTsk" : 1080,
"DueDatEla" : false,
"AgtOwnId" : null,
"WkgSklId" : 45387,
"EntId" : 2,
"Sts" : "Open",
"PrdTskId" : 201,
"Ttl" : "Provide Navigational Assistance",
"Des" : "Provide Navigational Assistance",
"SrvSklId" : 45792,
"PriSklId" : null,
"TotTim" : 0,
"PtnId" : null,
"PtnTic" : null,
"DepTskId" : null,
"IsAct" : true,
"IsMndOnCloTic" : false,
"Nms" : {
"AgtOwn" : null,
"Wkg" : "CM_T1",
"Ent" : "ENTITY",
"SrvSkl" : "Policies",
"PriSkl" : null,
"Ptn" : null,
"Frm" : ""
},
"AscTicItm" : null,
"FrmId" : null,
"Flds" : []
},
And the query I'm using to group the data looks like:
db.tickets.aggregate([
{$match:{
'Nms.Org': 'Organization1',
'Nms.Cmp':'Company',
'Nms.Wkg':'Workgroup’
}},
{$project:{
_id:0,
'Tsks._id':1,
'Tsks.Sts':1,
'Tsks.DueDat':1,
'Sts':1,
'Nms.Org':1,
'Nms.Cmp':1,
'Nms.Wkg':1,
'Nms.CloAgt':1,
'OpnPrps.CurAgtNme':1,
'OpnPrps.CurEntNme':1,
'Olas.EntNam':1
}},
{$unwind : "$Olas" },
{$unwind : "$Tsks" },
{$match:{$and:[{
'Tsks.DueDat': {$ne: null},
'Olas.EntNam': 'Entity',
'Tsks._id':{$gt:0}
}]}},
{$group:
{_id:{
Org:'$Nms.Org',
Cmp:'$Nms.Cmp',
Wkg:'$Nms.Wkg',
CurEntNme:'$Olas.EntNam',
CurTskId: '$Tsks._id',
DueDate:'$Tsks.DueDat',
Owner1:'$OpnPrps.CurAgtNme',
Owner2:'$Nms.CloAgt'
},
All: {$sum: { $cond:
[
{ $eq: [ '$Tsks.DueDat' , null ] } ,0,1
]}}
},
}
])
With this query I’m able to get every ticket for every owner (even if they’re open or closed) and their associate tasks, my problem is that I want to group all the tickets without taking in consideration the status, something like a second group after the results I have now, see below:
The table I get with Jasper Studio with this query looks like:
Owner Inventory
--------- --------------
Noemi 1 Owner1:Noemi | Owner2:null
Carl 2 Owner1:null | Owner2:Carl
Darla 2 Owner1:Darla| Owner2:null
Carl 1 Owner1:Carl| Owner2:null
Paola 2 Owner1:null| Owner2:Paola
Noemi 2 Owner1:null | Owner2:Noemi
As you can see, I’m getting repeated values due to the different field from each ticket. The table I'm looking for should be like this:
Owner Inventory
--------- --------------
Noemi 3 Owner1:Noemi | Owner2:Noemi
Carl 3 Owner1:Carl | Owner2:Carl
Darla 2 Owner1:Darla| Owner2:null
Paola 2 Owner1:null| Owner2:Paola
So, my problem is that I can’t find the way to group these results again to obtain the second table.

Related

Mongodb put Documents array as the same level

I have this array of documents, I would like to put "table" on the same level like mastil_antenas and other variables. how Can I do that with aggregate?
I'm trying with the aggregate $project but I can't get the result.
Example of Data
[ {
"mastil_antena" : "1",
"nro_platf" : "1",
"antmarcmast" : "ANDREW",
"antmodelmast" : "HWXXX6516DSA3M",
"retmarcmast" : "Ericsson",
"retmodelmast" : "ATM200-A20",
"distmast" : "1.50",
"altncramast" : "41.30",
"ORIENTMAG" : "73.00",
"incelecmast" : "RET",
"incmecmast" : "1.00",
"Feedertypemast" : "Fibra Optica",
"longjumpmast" : "5.00",
"longfo" : "100",
"calibrecablefuerza" : "10 mm",
"longcablefuerza" : "65.00",
"modelorruantena" : "32B66A",
"tiltmecfoto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114934746000.jpg",
"tiltmecfoto_fh" : "2017-10-18T05:51:22Z",
"az0foto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017115012727000.jpg",
"az0foto_fh" : "2017-10-18T05:55:21Z",
"azneg60foto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017115016199000.jpg",
"azneg60foto_fh" : "2017-10-18T05:55:36Z",
"azpos60foto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017115020147000.jpg",
"azpos60foto_fh" : "2017-10-18T05:55:49Z",
"etiqantenafoto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114920853000.jpg",
"etiqantenafoto_fh" : "2017-10-18T05:56:01Z",
"tiltelectfoto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114914236000.jpg",
"tiltelectfoto_fh" : "2017-10-18T05:56:13Z",
"idcablefoto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114900279000.jpg",
"idcablefoto_fh" : "2017-10-18T05:56:38Z",
"rrutmafoto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114947279000.jpg",
"rrutmafoto_fh" : "2017-10-18T05:56:49Z",
"etiquetarrufoto" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114954648000.jpg",
"etiquetarrufoto_fh" : "2017-10-18T05:57:02Z",
"rrutmafoto1" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017114959738000.jpg",
"rrutmafoto1_fh" : "2017-10-18T05:57:12Z",
"etiquetarrufoto1" : "https://secure.appenate.com/Files/FormEntry/47929-92cdf219-3128-4903-8324-a81000602b9d171017115005545000.jpg",
"etiquetarrufoto1_fh" : "2017-10-18T05:57:27Z",
"botontorre4" : "sstelcel3",
"table" : { /* put all varibles one level up*/
"tecmast" : "LTE",
"frecmast" : "2100",
"secmast" : "1",
"untitled440" : "Salir"
},
"comentmast" : "",
"longfeedmast" : "",
"numtmasmast" : "",
"otra_marca_antena" : "",
"otro_modelo_antena" : ""
}]
Starting from MongoDB version 3.4 you could use $addFields to do this.
//replace products with what makes sense in your database
db.getCollection('products').aggregate(
[
{ //1 add the properties from subdocument table to documents
$addFields: {
"documents.tecmast" : "documents.0.table.tecmast",
"documents.frecmast" : "documents.0.table.frecmast",
"documents.secmast" : "documents.0.table.secmast",
"documents.untitled440" : "documents.0.table.untitled440"
}
},
{
//(optional) 2 remove the table property from the documents
$project: {"documents.table" : 0}
}
]
)
Step 1: use $addFields to grab properties from table inside documents.table and put them on documents
Step 2: (optional) remove property "table" from documents.
I hope this helps!!!

How to find something from an array in mongo

{
"_id" : ObjectId("586aac4c8231ee0b98458045"),
"store_code" : NumberInt(10800),
"counter_name" : "R.N.Electric",
"address" : "314 khatipura road",
"locality" : "Khatipura Road (Jhotwara)",
"pincode" : NumberInt(302012),
"town" : "JAIPUR",
"gtm_city" : "JAIPUR",
"sales_office" : "URAJ",
"owner_name" : "Rajeev",
"owner_mobile" : "9828024073",
"division_mapping" : [//this contains only 1 element in every doc
{
"dvcode" : "cfc",
"dc" : "trade",
"beatcode" : "govindpura",
"fos" : {
"_id" : ObjectId("586ab8318231ee0b98458843"),
"loginid" : "9928483483",
"name" : "Arpit Gupta",
"division" : [
"cfc",
"iron"
],
"sales_office" : "URAJ", //office
"gtm_city" : "JAIPUR" //city
},
"beat" : {
"_id" : ObjectId("586d372b39f64316b9c3cbd7"),
"division" : {
"_id" : ObjectId("5869f8b639f6430fe4edee2a"),
"clientdvcode" : NumberInt(40),
"code" : "cfc",
"name" : "Cooking & Fabric Care",
"project_code" : "usha-fos",
"client_code" : "usha",
"agent_code" : "v5global"
},
"beatcode" : "govindpura",
"sales_office" : "URAJ",
"gtm_city" : "JAIPUR",
"active" : true,
"agency_code" : "v5global",
"client_code" : "USHA_FOS",
"proj_code" : "usha-fos",
"fos" : {
"_id" : ObjectId("586ab8318231ee0b98458843"),
"loginid" : "9928483483",
"name" : "Arpit Gupta",
"division" : [
"cfc",
"iron"
],
"sales_office" : "URAJ",
"gtm_city" : "JAIPUR"
}
}
}
],
"distributor_mail" : "sunil.todi#yahoo.in",
"project_code" : "usha-fos",
"client_code" : "usha",
"agent_code" : "v5global",
"distributor_name" : "Sundeep Electrical"
}
I am having only 1 element in division_mapping's array and I want to find those documents whose dc in division_mapping is trade.
I have tried following:
"division_mapping":{$elemMatch:{$eq:{"dc":"trade"}}}})
Dont know what I am doing wrong.
//Maybe I have to unwind the array but is there any other way?
According to MongoDB documentation
The $elemMatch operator matches documents that contain an array
field with at least one element that matches all the specified query
criteria.
According to above mentioned description to retrieve only documents whose dc in division_mapping is trade please try executing below mentioned query
db.collection.find({division_mapping:{$elemMatch:{dc:'trade'}}})

using 2 different result sets in mongodb

I'm using groovy with mongodb. I have a result set but need a value from a different grouping of documents. How do I pull that value into the result set I need?
MAIN:Network data
"resource_metadata" : {
"name" : "tapd2e75adf-71",
"parameters" : { },
"fref" : null,
"instance_id" : "9f170531-79d0-48ee-b0f7-9bd2788b1cc5"}
I need the display_name for the network data result set which is contained in the compute data.
CPU data
"resource_id" : "9f170531-79d0-48ee-b0f7-9bd2788b1cc5",
"resource_metadata" : {
"ramdisk_id" : "",
"display_name" : "testinstance0001"}
You can see the resource_id and the Instance_id are the same values. I know there is no relationship I can do but trying to reach to see if anyone has come across this. I'm using the table model to retrieve data for reporting. Hashtable has been suggested to me but I'm not seeing that working. Somehow in the hasNext I need to include the display_name value. in the networking data so GUID number doesn't only valid name shows from compute data.
def docs = meter.find(query).sort(sort).limit(50)\
while (docs.hasNext()) { def doc = docs.next()\
model.addRow([ doc.get("counter_name"),doc.get("counter_volume"),doc.get("timestamp"),\
doc.get("resource_metadata").getString("mac"),\
doc.get("resource_metadata").getString("instance_id"),\
doc.get("counter_unit")]
as Object[]);}
Full document:
1st set where I need the network data measure with no name only id {resource_metadata.instance_id}
{
"_id" : ObjectId("528812f8be09a32281e137d0"),
"counter_name" : "network.outgoing.packets",
"user_id" : "4d4e43ec79c5497491b23b13644c2a3b",
"timestamp" : ISODate("2013-11-17T00:51:00Z"),
"resource_metadata" : {
"name" : "tap6baab24e-8f",
"parameters" : { },
"fref" : null,
"instance_id" : "a8727a1d-4661-4565-9c0a-511279024a97",
"instance_type" : "50",
"mac" : "fa:16:3e:a3:bf:fc"
},
"source" : "openstack",
"counter_unit" : "packet",
"counter_volume" : 4611911,
"project_id" : "97dc4ca962b040608e7e707dd03f2574",
"message_id" : "54039238-4f22-11e3-8e68-e4115b99a59d",
"counter_type" : "cumulative"
}
2nd set where I want to grab the name as I get the values {resource_id}:
"_id" : ObjectId("5287bc3ebe09a32281dd2594"),
"counter_name" : "cpu",
"user_id" : "4d4e43ec79c5497491b23b13644c2a3b",
"message_signature" :
"timestamp" : ISODate("2013-11-16T18:40:58Z"),
"resource_id" : "a8727a1d-4661-4565-9c0a-511279024a97",
"resource_metadata" : {
"ramdisk_id" : "",
"display_name" : "vmsapng01",
"name" : "instance-000014d4",
"disk_gb" : "",
"availability_zone" : "",
"kernel_id" : "",
"ephemeral_gb" : "",
"host" : "3746d148a76f4e1a8203d7e2378ef48ccad8a714a47e7481ab37bcb6",
"memory_mb" : "",
"instance_type" : "50",
"vcpus" : "",
"root_gb" : "",
"image_ref" : "869be2c0-9480-4239-97ad-df383c6d09bf",
"architecture" : "",
"os_type" : "",
"reservation_id" : ""
},
"source" : "openstack",
"counter_unit" : "ns",
"counter_volume" : NumberLong("724574640000000"),
"project_id" : "97dc4ca962b040608e7e707dd03f2574",
"message_id" : "a240fa5a-4eee-11e3-8e68-e4115b99a59d",
"counter_type" : "cumulative"
}
This is another collection that contains the same value but just thought it would be easier to grab from same collection:
"_id" : "a8727a1d-4661-4565-9c0a-511279024a97",
"metadata" : {
"ramdisk_id" : "",
"display_name" : "vmsapng01",
"name" : "instance-000014d4",
"disk_gb" : "",
"availability_zone" : "",
"kernel_id" : "",
"ephemeral_gb" : "",
"host" : "3746d148a76f4e1a8203d7e2378ef48ccad8a714a47e7481ab37bcb6",
"memory_mb" : "",
"instance_type" : "50",
"vcpus" : "",
"root_gb" : "",
"image_ref" : "869be2c0-9480-4239-97ad-df383c6d09bf",
"architecture" : "",
"os_type" : "",
"reservation_id" : "",
}
Mike
It looks like these data are in 2 different collections, is this correct?
Would you be able to query CPU data for each "instance_id" ("resource_id")?
Or if this would cause too many queries to the database (looks like you limit to 50...) you could use $in with the list of all "Instance_id"s
http://docs.mongodb.org/manual/reference/operator/query/in/
Either way, you will need to query each collection separately.

Errors while creating a collection in MongoDB

I am new to MongoDB. I am not able to create a collection. It gives a sentence in the mongo shell - Display all 169 possibilities? (y or n). The code is -
db.Lead.insert(
{ LeadID: 1,
MasterAccountID: 100,
LeadName: 'Sarah',
LeadEmailID : 'sarah#hmail.com',
LeadPhoneNumber : '2132155445',
Details : [{ StateID: 1,
TaskID : 1,
Assigned By : 1001,
TimeStamp : '10:00:00',
StatusID : 1 }
]
}
)
Not sure what the issue is. Please help me out with the same.
Regards.
Apart from the fact there is a space in Assigned By everything looks good.
I am able to insert it properly.
> db.Lead.find().pretty()
{
"_id" : ObjectId("517ebe75278e0557fd167eb7"),
"LeadID" : 1,
"MasterAccountID" : 100,
"LeadName" : "Sarah",
"LeadEmailID" : "sarah#hmail.com",
"LeadPhoneNumber" : "2132155445",
"Details" : [
{
"StateID" : 1,
"TaskID" : 1,
"AssignedBy" : 1001,
"TimeStamp" : "10:00:00",
"StatusID" : 1
}
]
}

Migrating from MongoDB to HBase

Hi I am very new to HBase database. I downloaded some twitter data and stored into MongoDB. Now I need to transform that data into HBase to speed-up Hadoop processing. But I am not able to create it's scheme. Here I have twitter data into JSON format-
{
"_id" : ObjectId("512b71e6e4b02a4322d1c0b0"),
"id" : NumberLong("306044618179506176"),
"source" : "Facebook",
"user" : {
"name" : "Dada Bhagwan",
"location" : "India",
"url" : "http://www.dadabhagwan.org",
"id" : 191724440,
"protected" : false,
"timeZone" : null,
"description" : "Founder of Akram Vignan - Practical Spiritual Science of Self Realization",
"screenName" : "dadabhagwan",
"geoEnabled" : false,
"profileImageURL" : "http://a0.twimg.com/profile_images/1647956820/M_DSC_0034_normal.jpg",
"biggerProfileImageURL" : "http://a0.twimg.com/profile_images/1647956820/M_DSC_0034_bigger.jpg",
"profileImageUrlHttps" : "https://si0.twimg.com/profile_images/1647956820/M_DSC_0034_normal.jpg",
"profileImageURLHttps" : "https://si0.twimg.com/profile_images/1647956820/M_DSC_0034_normal.jpg",
"biggerProfileImageURLHttps" : "https://si0.twimg.com/profile_images/1647956820/M_DSC_0034_bigger.jpg",
"miniProfileImageURLHttps" : "https://si0.twimg.com/profile_images/1647956820/M_DSC_0034_mini.jpg",
"originalProfileImageURLHttps" : "https://si0.twimg.com/profile_images/1647956820/M_DSC_0034.jpg",
"followersCount" : 499,
"profileBackgroundColor" : "EEE4C1",
"profileTextColor" : "333333",
"profileLinkColor" : "990000",
"lang" : "en",
"profileSidebarFillColor" : "FCF9EC",
"profileSidebarBorderColor" : "CBC09A",
"profileUseBackgroundImage" : true,
"showAllInlineMedia" : false,
"friendsCount" : 1,
"favouritesCount" : 0,
"profileBackgroundImageUrl" : "http://a0.twimg.com/profile_background_images/396759326/dadabhagwan-twitter.jpg",
"profileBackgroundImageURL" : "http://a0.twimg.com/profile_background_images/396759326/dadabhagwan-twitter.jpg",
"profileBackgroundImageUrlHttps" : "https://si0.twimg.com/profile_background_images/396759326/dadabhagwan-twitter.jpg",
"profileBannerURL" : null,
"profileBannerRetinaURL" : null,
"profileBannerIPadURL" : null,
"profileBannerIPadRetinaURL" : null,
"miniProfileImageURL" : "http://a0.twimg.com/profile_images/1647956820/M_DSC_0034_mini.jpg",
"originalProfileImageURL" : "http://a0.twimg.com/profile_images/1647956820/M_DSC_0034.jpg",
"utcOffset" : -1,
"contributorsEnabled" : false,
"status" : null,
"createdAt" : NumberLong("1284700143000"),
"profileBannerMobileURL" : null,
"profileBannerMobileRetinaURL" : null,
"profileBackgroundTiled" : false,
"statusesCount" : 1713,
"verified" : false,
"translator" : false,
"listedCount" : 6,
"followRequestSent" : false,
"descriptionURLEntities" : [ ],
"urlentity" : {
"url" : "http://www.dadabhagwan.org",
"start" : 0,
"end" : 26,
"expandedURL" : "http://www.dadabhagwan.org",
"displayURL" : "http://www.dadabhagwan.org"
},
"rateLimitStatus" : null,
"accessLevel" : 0
},
"contributors" : [ ],
"geoLocation" : null,
"place" : null,
"favorited" : false,
"retweet" : false,
"retweetedStatus" : null,
"retweetCount" : 0,
"userMentionEntities" : [ ],
"retweetedByMe" : false,
"currentUserRetweetId" : -1,
"possiblySensitive" : false,
"urlentities" : [
{
"url" : "http://t.co/gR1GohGjaj",
"start" : 113,
"end" : 135,
"expandedURL" : "http://fb.me/2j2HKHJrM",
"displayURL" : "fb.me/2j2HKHJrM"
}
],
"hashtagEntities" : [ ],
"mediaEntities" : [ ],
"truncated" : false,
"inReplyToStatusId" : -1,
"text" : "Spiritual Quote of the Day :\n\n‘I am Chandubhai’ is an illusion itself and from that are \nkarmas charged. When... http://t.co/gR1GohGjaj",
"inReplyToUserId" : -1,
"inReplyToScreenName" : null,
"createdAt" : NumberLong("1361801697000"),
"rateLimitStatus" : null,
"accessLevel" : 0
}
Here how to divide data into columns and column-family? I thought to make one "twitter" column-family that contain source, getlocation, place, retweet etc... and another "user" column-family and that contain name, location etc... (user's data). i.e new column family for each inner level sub-document.
Is this approach is correct? Now How I will differentiate urlentity for "user" column-family and "twitter" column-family?
And how to handle those keys that contain list of sub-documents (for e.g. urlentity)
There are many ways to model this in HBase ranging from storing everything in a single column to having a different table for each sub entity with several other tables for "indexing".
Generally speaking you model the data in hbase based on you read and write access patterns. fo r example column family are stored in different files on disk. A reason to divide data into two column families is if there are a lot of cases where you need data from one and not the other. etc.
There's a good presentation about HBAse schema design by Ian Varley from HBaseCon 2012 you can find the slides here and the video here