Picking an array of hashes from a hash - krl

I have a hash coming back from an XML datasource that looks like this:
{...,
'records' :{
'record' :[
{'availability' :{'$t' :'available'}, ...},
{'availability' :{'$t' :'available'}, ...}
]
}
};
I'd like to get all the record hashes into an array so I can filter() it and do some other operations. However, when I have this statement in my pre block,
raw_records = raw.pick("$..record");
the array that gets returned is an array of two empty strings:
var raw_records = ['', ''];
The odd thing is that I can pick out just availability with expected results:
availability = raw.pick("$..availability.$t");
producing
var availability = ['available', 'available'];
What's wrong with my first pick()?
EDIT: Here is a more complete version that should help with reproducing the problem. It's slightly different, since I'm using the JSON version of the web service now:
global {
datasource hbll <- "https://svc.lib.byu.edu/services/catalog/v1/search/?field=isbn&format=json&terms=";
}
rule new_rule {
select when pageview "amazon.com/.*/?dp/(.*)/" setting (isbn)
pre {
//This is the array with two empty strings...
raw = datasource:hbll(isbn);
myfilter = function(x) { x.pick("availability") eq "available"; };
records = raw.filter(myfilter);
len = records.length();
availability = records.pick("$..availability");
middleman = len > 1 => availability[0] | availability;
available = middleman eq "available" => true | false;
url_list = records.pick("$..url");
url = len > 1 => url_list[0] | url_list;
msg = <<
<p>This book is available for checkout at the BYU Library.</p>
More information
>>;
}
notify("BYU Harold B. Lee Library", msg) with sticky=true;
}

I'm going to need a more complete example. The test app and results I got are below:
ruleset a8x167 {
meta {
name "Pick - Array of Hashes"
description <<
Testing
>>
author "Sam Curren"
logging on
}
dispatch {}
global {
raw = {
'records' :{
'record' :[
{'availability' :{'$t' :'available'}},
{'availability' :{'$t' :'available'}}
]
}
};
}
rule test {
select when pageview ".*" setting ()
pre {
raw_records = raw.pick("$..record");
availability = raw.pick("$..availability.$t");
}
notify("Hello World", "This is a sample rule.");
}
}
And Results:
var raw_records = [{'availability' :{'$t' :'available'}}, {'availability' :{'$t' :'available'}}];
var availability = ['available', 'available'];

Related

How to update nested object (i.e., doc field having object type) in single document in mongodb

I have doc collection which have object type field named price (i.e., see below), I just want to update/insert that field by adding new key value pairs to it.
suppose i have this as collection (in db):
[
{
_id: 1,
price: {
amazon: 102.1,
apple: 500
}
},
....
....
];
Now I want to write an query which either update price's or inserts if not exist in price.
let's suppose these as input data to update/insert with:
var key1 = 'ebay', value1 = 300; // will insert
var key2 = 'amazon', value2 = 100; // will update
assume doc having _id: 1 for now.
Something like $addToSet operator?, Though $addToSet only works for array & i want to work within object).
expected output:
[
{
_id: 1,
price: {
amazon: 100, // updated
apple: 500,
ebay: 300 // inserted
}
},
....
....
];
How can i do/achieve this?
Thanks.
You could construct the update document dynamically to use the dot notation and the $set operator to do the update correctly. Using your example above, you'd want to run the following update operation:
db.collection.update(
{ "_id": 1 },
{
"$set": { "price.ebay": 300, "price.amazon": 100 }
}
)
So, given the data input, you would want to construct an update document like { "price.ebay": 300, "price.amazon": 100 }
With the inputs as you have described
var key1 = 'ebay', value1 = 300; // will insert
var key2 = 'amazon', value2 = 100; // will update
Construct the update object:
var query = { "_id": 1 },
update = {};
update["price."+key1] = value1;
update["price."+key2] = value2;
db.collection.update(query, {"$set": update});

Read data from huge Mongo DB

Scenario:
Collection A has 40 million records and each record has almost 20 fields.
Get 5 (defined)fields from A and change the field name and populate in collection B.
Example:
A
"_id" is the primary key here
{
"_id":123
"id":123
"title":"test"
"summary": "test"
"version":1
"parentid":12
}
B
{
"_id":123
"p$id":123
"p$parentid":12
"p$title":"test"
}
Can someone please suggest a good way to write a code for this scenario?
I wrote the code but it took 5 hrs to complete.
My Code:
config.py:
It has all Mongo DB related details.
Actual code:
from pymongo import MongoClient
import operator
import datetime
print "Start time", datetime.datetime.now()
primary_dict = {}
primary_list = []
secondary_dict = {}
secondary_list = []
missing_id = []
mismatch_id = []
alias_dict = {
"_id": "_id",
"id":"p$id"
"title": "p$title"
"parentid":"p$parentid"
}
def mongo_connect(host, port, db, collection):
client = MongoClient(host, port)
db_obj = client[db]
collection_obj = db_obj[collection]
return collection_obj
def primary():
global primary_list
global primary_dict
global secondary_dict
global secondary_list
global missing_id
primary_collection = mongo_connect(config.mongo_host, config.mongo_port, config.mongo_primary_db, config.mongo_primary_collection)
secondary_collection = mongo_connect(config.mongo_host, config.mongo_port, config.mongo_secondary_db, config.mongo_secondary_collection)
for dict1 in primary_collection.find({},{"_id":1,"title":1}).batch_size(1000):
count = 0
target_id = ''
primary_list = []
secondary_list = []
target_id = dict1['_id']
primary_list.insert(count, dict1)
if (secondary_collection.find_one({"_id":target_id})) is None:
missing_id.append(target_id)
continue
else:
secondary_list.insert(count,secondary_collection.find_one({"_id":target_id}))
compare(primary_list, secondary_list)
def compare(list1, list2):
global alias_dict
global mismatch_id
global missing_id
for l1, l2 in zip(primary_list,secondary_list):
if len(l1) != len(l2):
mismatch_id.append(l1['_id'])
continue
else:
for key, value in l1.items():
if value != l2[alias_dict[key]]:
mismatch_id.append(l1['_id'])
primary()
print "Mismatch id list", mismatch_id
print "Missing Id list", missing_id
print "End time", datetime.datetime.now()
Well you could do this:
db.eval(function(){
db.primary_collection.find({},
{ id: 1, parentid: 1, title: 1 }).forEach(function(doc){
var newDoc = {};
Object.keys(doc).forEach(function(key) {
var newKey = ( key == "_id" ) ? key : "p$" + key;
newDoc[newKey] = doc[key];
});
db.secondary_collection.insert(newDoc);
});
})
Which uses db.eval() to execute the code on the server, which will be as fast as you will get.
But please read the documentation on this as you will be "locking" the database while this operation takes place. And of course you cannot do this across servers if that is your intent.

How do I return JSON Arrays instead of objects using FuelPHP's ORM and Controller_Rest

Controller_Something extends Controller_Rest {
public function get_something() {
$query = Model_Something::query()->related('hasMany')->get();
return $this->response($query);
}
}
Returns:
{
stuff: here,
looks: good,
hasMany: {
151251: {
id: 151251,
other: stuff
}
}
}
I want the relations as arrays:
{
stuff: here,
looks: good,
hasMany: [
{
id: 151251,
other: stuff
}
]
}
This happens because the ORM returns related result arrays with keys corresponding to the record's PKEY, and JSON interprets this as an Object. I want these arrays to go through array_values() or something, so the JSON result will use Array.
Currently I am doing this to "solve" the problem:
$res = Format::forge($result_set)->to_array();
$res['hasMany'] = array_values($res['hasMany']);
return $this->response($res);
But this is only useful to one or two levels, where I know the data will be.
If there are relations that are not guaranteed, I don't what to have to error-check every potential subset of a complex Model.
I just want all the one-to-many arrays to be keyed sequentially instead of by the records PKEY.
$query = Model_Something::find()->related('hasMany');
returns a query object in < 1.6, an exception in 1.6, and null in 1.6.1+. So I assume you do something else that produces that result.
If you want arrays as a result instead of objects, you need to convert the result. You can do that by calling to_array() on a model object, or by using the Format class to convert an array of model objects to an array:
$result = \Format::forge($result)->to_array();
function object_to_array($data){
$new_data2= array();
$keys = array_keys((array)$data);
foreach ($keys as $key)
{
$value = $data[$key];
if(is_numeric($key))
{
$new_data2[] = object_to_array($value);
}elseif(is_string($value) || is_null($value))
{
$new_data2[$key] = $data[$key];
}else
{
$new_data2[$key] = object_to_array($value);
}
}
return $new_data2;
}
$formattedArray = \Format::forge(Model_Product::query()->get())->to_array();
$cleanData=object_to_array($formattedArray);
echo \Format::forge($cleanData)->to_json();
This way checks the array key; if key is number and value is object or array clean key
In short: you can't unless you create a hook in Query:hydrate https://github.com/fuel/orm/blob/1.7/develop/classes/query.php#L1083, or shadowing the Query class with some implementation that returns the very same results except for hydrate.
Programmatically it is possible to be done. Following the model below, but for very deep relationships is not interesting by the complexity of the algorithm.
Model:
class Model_Something extends \Orm\Model
{
...
public function relatedAsArray()
{
$this->relationsAsArray($this->_data_relations);
}
private function relationsAsArray(&$relations)
{
foreach ($relations as $key => $relation) {
foreach ($relation as $fields) {
foreach ($fields as $field) {
if (isset($field->_data_relations)) {
$this->relationsAsArray($field->_data_relations);
}
}
}
if (is_array($relation)) {
$relations[$key] = array_values($relation);
}
}
}
}
Call of method:
$something = Model_Something::find($somethingId, array('related' => array('hasMany', 'hasMany.hasOthers')));
$something->relatedAsArray();
The result was exactly like you wanted.
Result:
{
stuff: here,
looks: good,
hasMany: [
{
id: 151251,
other: stuff,
hasOthers: [
{
id: 12312,
field: other
}, ...
]
}, ...
]
}

mongodb query with group()?

this is my collection structure :
coll{
id:...,
fieldA:{
fieldA1:[
{
...
}
],
fieldA2:[
{
text: "ciao",
},
{
text: "hello",
},
]
}
}
i want to extract all fieldA2 in my collection but if the fieldA2 is in two or more times i want show only one.
i try this
Db.runCommand({distinct:’coll’,key:’fieldA.fieldA2.text’})
but nothing. this return all filedA1 in the collection.
so i try
db.coll.group( {
key: { 'fieldA.fieldA2.text': 1 },
cond: { } },
reduce: function ( curr, result ) { },
initial: { }
} )
but this return an empty array...
How i can do this and see the execution time?? thank u very match...
Since you are running 2.0.4 (I recommend upgrading), you must run this through MR (I think, maybe there is a better way). Something like:
map = function(){
for(i in this.fieldA.fieldA2){
emit(this.fieldA.fieldA2[i].text, 1);
// emit per text value so that this will group unique text values
}
}
reduce = function(values){
// Now lets just do a simple count of how many times that text value was seen
var count = 0;
for (index in values) {
count += values[index];
}
return count;
}
Will then give you a collection of documents whereby _id is the unique text value from fieldA2 and the value field is of the amount of times is appeared i the collection.
Again this is a draft and is not tested.
I think the answer is simpler than a Map/Reduce .. if you just want distinct values plus execution time, the following should work:
var startTime = new Date()
var values = db.coll.distinct('fieldA.fieldA2.text');
var endTime = new Date();
print("Took " + (endTime - startTime) + " ms");
That would result in a values array with a list of distinct fieldA.fieldA2.text values:
[ "ciao", "hello", "yo", "sayonara" ]
And a reported execution time:
Took 2 ms

Detect empty pick() from hash in KRL

I have a bunch of data in a hash and I am picking from it. Sometimes there will be data there to pick and sometimes there won't. What is the best way to know when there was something found by the pick operator and when there wasn't so I can react to that in my code?
The pick operator will take a second optional parameter that will make it so that it always returns the results in an array. This means that if something is picked, the length of the array will be greater than 0, otherwise it will be 0. You can then use that to do what you are wanting to do.
Example code/app taken from http://kynetxappaday.wordpress.com/2011/01/04/day-30-detecting-empty-pick/
ruleset a60x526 {
meta {
name "hash-pick-detect"
description <<
hash-pick-detect
>>
author "Mike Grace"
logging on
}
global {
dataHash = {
"one": {
"name": "Mike"
}, // number
"two": {
"random": 8
}, // number
"three": {
"name": "Alex"
} // number
}; // dataHash
} // global
rule detect_the_pick {
select when pageview ".*"
foreach dataHash setting (key, value)
pre {
userName = value.pick("$.name", true);
length = userName.length();
}
if (length > 0) then {
notify("Key: #{key}","Name: #{userName}<br/>Length: #{length}") with sticky = true;
}
notfired {
raise explicit event empty_pick
with pickedKey = key;
}
}
rule empty_pick_found {
select when explicit empty_pick
pre {
pickedKey = event:param("pickedKey");
results =<<
Key: #{pickedKey}<br/>
doesn't have a name associated with it to pick from
>>; //' fixing syntax highlighting
}
{
notify("An empty pick was detected",results) with sticky = true;
}
}
}