Creating class instance properties from a dictionary? - class

I'm importing from a CSV and getting data roughly in the format
{ 'Field1' : 3000, 'Field2' : 6000, 'RandomField' : 5000 }
The names of the fields are dynamic. (Well, they're dynamic in that there might be more than Field1 and Field2, but I know Field1 and Field2 are always going to be there.
I'd like to be able to pass in this dictionary into my class allMyFields so that I can access the above data as properties.
class allMyFields:
# I think I need to include these to allow hinting in Komodo. I think.
self.Field1 = None
self.Field2 = None
def __init__(self,dictionary):
for k,v in dictionary.items():
self.k = v
#of course, this doesn't work. I've ended up doing this instead
#self.data[k] = v
#but it's not the way I want to access the data.
q = { 'Field1' : 3000, 'Field2' : 6000, 'RandomField' : 5000 }
instance = allMyFields(q)
# Ideally I could do this.
print q.Field1
Any suggestions? As far as why -- I'd like to be able to take advantage of code hinting, and importing the data into a dictionary called data as I've been doing doesn't afford me any of that.
(Since the variable names aren't resolved till runtime, I'm still going to have to throw a bone to Komodo - I think the self.Field1 = None should be enough.)
So - how do I do what I want? Or am I barking up a poorly designed, non-python tree?

You can use setattr (be careful though: not every string is a valid attribute name!):
>>> class AllMyFields:
... def __init__(self, dictionary):
... for k, v in dictionary.items():
... setattr(self, k, v)
...
>>> o = AllMyFields({'a': 1, 'b': 2})
>>> o.a
1
Edit: let me explain the difference between the above code and SilentGhost's answer. The above code snippet creates a class of which instance attributes are based on a given dictionary. SilentGhost's code creates a class whose class attributes are based on a given dictionary.
Depending on your specific situation either of these solutions may be more suitable. Do you plain to create one or more class instances? If the answer is one, you may as well skip object creation entirely and only construct the type (and thus go with SilentGhost's answer).

>>> q = { 'Field1' : 3000, 'Field2' : 6000, 'RandomField' : 5000 }
>>> q = type('allMyFields', (object,), q)
>>> q.Field1
3000
docs for type explain well what's going here (see use as a constructor).
edit: in case you need instance variables, the following also works:
>>> a = q() # first instance
>>> a.Field1
3000
>>> a.Field1 = 1
>>> a.Field1
1
>>> q().Field1 # second instance
3000

You can also use dict.update instead of manually looping over items (and if you're looping, iteritems is better).
class allMyFields(object):
# note: you cannot (and don't have to) use self here
Field1 = None
Field2 = None
def __init__(self, dictionary):
self.__dict__.update(dictionary)
q = { 'Field1' : 3000, 'Field2' : 6000, 'RandomField' : 5000 }
instance = allMyFields(q)
print instance.Field1 # => 3000
print instance.Field2 # => 6000
print instance.RandomField # => 5000

You could make a subclass of dict which allows attribute lookup for keys:
class AttributeDict(dict):
def __getattr__(self, name):
return self[name]
q = AttributeDict({ 'Field1' : 3000, 'Field2' : 6000, 'RandomField' : 5000 })
print q.Field1
print q.Field2
print q.RandomField
If you try to look up an attribute that dict already has (say keys or get), you'll get that dict class attribute (a method). If the key you ask for doesn't exist on the dict class, then the __getattr__ method will get called and will do your key lookup.

Use setattr for the pretty way. The quick-n-dirty way is to update the instance internal dictionary:
>>> class A(object):
... pass
...
>>> a = A()
>>> a.__dict__.update({"foo": 1, "bar": 2})
>>> a.foo
1
>>> a.bar
2
>>>

Using named tuples (Python 2.6):
>>> from collections import namedtuple
>>> the_dict = {'Field1': 3, 'Field2': 'b', 'foo': 4.9}
>>> fields = ' '.join(the_dict.keys())
>>> AllMyFields = namedtuple('AllMyFields', fields)
>>> instance = AllMyFields(**the_dict)
>>> print instance.Field1, instance.Field2, instance.foo
3 b 4.9

class SomeClass:
def __init__(self,
property1,
property2):
self.property1 = property1
self.property2 = property2
property_dict = {'property1': 'value1',
'property2': 'value2'}
sc = SomeClass(**property_dict)
print(sc.__dict__)

Or you can try this
class AllMyFields:
def __init__(self, field1, field2, random_field):
self.field1 = field1
self.field2 = field2
self.random_field = random_field
#classmethod
def get_instance(cls, d: dict):
return cls(**d)
a = AllMyFields.get_instance({'field1': 3000, 'field2': 6000, 'random_field': 5000})
print(a.field1)

enhanced of sub class of dict
recurrence dict works!
class AttributeDict(dict):
"""https://stackoverflow.com/a/1639632/6494418"""
def __getattr__(self, name):
return self[name] if not isinstance(self[name], dict) \
else AttributeDict(self[name])
if __name__ == '__main__':
d = {"hello": 1, "world": 2, "cat": {"dog": 5}}
d = AttributeDict(d)
print(d.cat)
print(d.cat.dog)
print(d.cat.items())
"""
{'dog': 5}
5
dict_items([('dog', 5)])
"""

If you are open for adding a new library, pydantic is a very efficient solution. It uses python annotation to construct object and validate type Consider the following code:
from pydantic import BaseModel
class Person(BaseModel):
name: str
age: str
data = {"name": "ahmed", "age": 36}
p = Person(**data)
pydantic: https://pydantic-docs.helpmanual.io/

A simple solution is
field_dict = { 'Field1' : 3000, 'Field2' : 6000, 'RandomField' : 5000 }
# Using dataclasses
from dataclasses import make_dataclass
field_obj = make_dataclass("FieldData", list(field_dict.keys()))(*field_dict.values())
# Using attrs
from attrs import make_class
field_obj = make_class("FieldData", list(field_dict.keys()))(*field_dict.values())

Related

Strict parsing into POJOs with KMongo

When I find documents in my collections and parse them into POJOs, I would like to see exceptions, if additional keys are available in the MongoDB, that do not correspondent to my POJO.
Can't find a way to configure that.
What I do
data class MyPojo(var a: Int)
val mongoClient = KMongo.createClient(...)
val collection = mongoClient...
val results = collection.aggregate<MyPojo>(...)
and if a result document is
{ "a": 1, "b": 2 }
What I get:
MyPojo(a=1)
I would like to see an exception of sort
kotlinx.serialization.json.JsonDecodingException: Invalid JSON...: Encountered an unknown key b
Does anyone know how to do that?
You have to specify strictMode = true in your JsonConfiguration for example:
install(ContentNegotiation) {
serialization(
contentType = ContentType.Application.Json,
json = Json(
JsonConfiguration(
strictMode = true,
prettyPrint = true
)
)
)
}

PyMongo - Name must be an instance of Str

I'm trying to read and write from a database on MongoDB Atlas and while I can read data from my collections just fine, any attempt to write to a collection causes PyMongo to raise an exception 'name must be an instance of str'.
I'm guessing this is in reference to the MongoClient object but the thing is I am using a connection string. Can anyone help me with what I'm doing wrong?
My code is as follows: (I've got a ton of comments to help me understand better, so please excuse the lack of brevity)
def setattributes(self, rowdict):
""" a function to create a user. Assumes that only a data
dict is provided. strips everything else and updates.
what the data dict contains is your problem.
"""
with UseDatabase(self.dbconfig) as db:
collection = db.database[self.tablename]
locationdict = { #create a corresponding location entry
'email' : rowdict['email'],
'devstate' : 0,
'location' : {
'type': 'Point',
'coordinates' : [ 0, 0 ]
},
'lastseen' : datetime.now()
}
try:
res = db.insertdata(collection, rowdict) #insert user data
except Exception as e:
print("Error adding user to DB : %s" % e)
return False # if you cant insert, return False
try:
loccollection = db.database[self.locationtable]
resloc = db.insertdata(loccollection, locationdict)
except Exception as e: # if the status update failed
db.collection.remove({'email' : rowdict['email']})
#rollback the user insert - atomicity
return False
return True
My Database code is as follows:
class ConnectionError(Exception):
pass
class CredentialsError(Exception):
pass
class UseDatabase:
def __init__(self, config: dict):
self.config = config
def __enter__(self, config = atlas_conn_str):
try:
self.client = MongoClient(config)
self.database = self.client['reviv']
return self
except:
print("Check connection settings")
raise ConnectionError
def __exit__(self, exc_type, exc_value, exc_traceback):
self.client.close()
def insertdata(self, collection, data):
post = data
post_id = self.database[collection].insert_one(post).inserted_id
return post_id
def getdetails(self, collection, emailid):
user = collection.find_one({'email' : emailid}, {'_id' : 0})
return user
In your "setattributes()", you access a pymongo Collection instance by name:
collection = db.database[self.tablename]
Then in "insertdata()" you attempt to do the same thing again, but now "collection" is not a string, it's a Collection instance:
post_id = self.database[collection].insert_one(post).inserted_id
Instead, simply do:
post_id = collection.insert_one(post).inserted_id
By the way, I see that you've written some code to ensure you create and close a MongoClient for each operation. This unnecessarily complicated and it will slow down your application dramatically by requiring a new connection for each operation. As the FAQ says, "Create this client once for each process, and reuse it for all operations. It is a common mistake to create a new client for each request, which is very inefficient."
I suggest you delete your UseDatabase class, make the MongoClient a module global variable, and use the MongoClient directly:
client = MongoClient(atlas_conn_str)
db = client[locationtable]
class C:
def setattributes(self, rowdict):
collection = db[self.tablename]
# ... make rowdict as usual, and then:
res = collection.insert_one(rowdict)
This code is simpler and will run much faster.

extend an object with hidden properties

I have the following coffeescript class
class Data
constructor: (data)->
data.prototype.meta = #meta
return data
meta: ->
return { id: 123 }
# this is how I want to work with it, as an example
a = {name: "val"}
x = new Data a
for key, item of x
console.log key, item ## should say `name`, `val` (not meta)
console.log x.meta ## should say `{id:123}
I would like to add the meta property to an existing object, but I do NOT want the meta to come up when I loop on the new object x using a for loop.
If I have failed to explain this properly please let me know I will try and do better :)
You can use Object.defineProperty():
class Data
constructor: (data) ->
Object.defineProperty(data, "meta", { enumerable: false, value: #meta });
return data
meta: { id: 123 }
a = {name: "val"}
x = new Data(a)
for key, item of x
console.log key, item ## should say `name`, `val` (not meta)
console.log x.meta ## should say `{id:123}
A ended up using the following...
a = {name: "val"}
a.meta = {id: 123} ## as normal
Object.defineProperty a, "meta", enumerable: false ## this hides it from loops
for key, item of x
console.log key, item ## should say `name`, `val` (not meta)
console.log x.meta ## should say `{id:123}

Intersystems cache db - specific ROWSPEC dynamically

I can do this:
Query All() As %Query(CONTAINID = 1, ROWSPEC = "Title:%String,Author:%String")
{
}
But I need to specify ROWSPEC dynamically. I have globals like this:
^glob("title1","author1","xxKZ1") = "val1"
^glob("title1","author1","ssn","xyPO2") = "val2"
^glob("title2","author2","xxII8") = "val3"
^globNext("key1") = "val1"
^globNext("key1","key2") = "val2"
So I need to dynamically create structure of query row. For ^glob I need have something like this:
Query All() As %Query(CONTAINID = 1, ROWSPEC = "Prop1:%String, Prop2:%String, Prop3:%String, Prop4:%String, Val:%String")
{
}
For ^globNext I need something like:
Query All() As %Query(CONTAINID = 1, ROWSPEC = "Prop1:%String, Prop2:%String)
{
}
Is it possible to reach it?
No, it is not possible, because number of columns have to be fixed. But as your code generate result, you can define some columns like Prop1, Prop2...PropN, and in result return as many columns as you need, and any last columns just well be null. And after that in your client-side code you cant get access by its like Value.

Read data from huge Mongo DB

Scenario:
Collection A has 40 million records and each record has almost 20 fields.
Get 5 (defined)fields from A and change the field name and populate in collection B.
Example:
A
"_id" is the primary key here
{
"_id":123
"id":123
"title":"test"
"summary": "test"
"version":1
"parentid":12
}
B
{
"_id":123
"p$id":123
"p$parentid":12
"p$title":"test"
}
Can someone please suggest a good way to write a code for this scenario?
I wrote the code but it took 5 hrs to complete.
My Code:
config.py:
It has all Mongo DB related details.
Actual code:
from pymongo import MongoClient
import operator
import datetime
print "Start time", datetime.datetime.now()
primary_dict = {}
primary_list = []
secondary_dict = {}
secondary_list = []
missing_id = []
mismatch_id = []
alias_dict = {
"_id": "_id",
"id":"p$id"
"title": "p$title"
"parentid":"p$parentid"
}
def mongo_connect(host, port, db, collection):
client = MongoClient(host, port)
db_obj = client[db]
collection_obj = db_obj[collection]
return collection_obj
def primary():
global primary_list
global primary_dict
global secondary_dict
global secondary_list
global missing_id
primary_collection = mongo_connect(config.mongo_host, config.mongo_port, config.mongo_primary_db, config.mongo_primary_collection)
secondary_collection = mongo_connect(config.mongo_host, config.mongo_port, config.mongo_secondary_db, config.mongo_secondary_collection)
for dict1 in primary_collection.find({},{"_id":1,"title":1}).batch_size(1000):
count = 0
target_id = ''
primary_list = []
secondary_list = []
target_id = dict1['_id']
primary_list.insert(count, dict1)
if (secondary_collection.find_one({"_id":target_id})) is None:
missing_id.append(target_id)
continue
else:
secondary_list.insert(count,secondary_collection.find_one({"_id":target_id}))
compare(primary_list, secondary_list)
def compare(list1, list2):
global alias_dict
global mismatch_id
global missing_id
for l1, l2 in zip(primary_list,secondary_list):
if len(l1) != len(l2):
mismatch_id.append(l1['_id'])
continue
else:
for key, value in l1.items():
if value != l2[alias_dict[key]]:
mismatch_id.append(l1['_id'])
primary()
print "Mismatch id list", mismatch_id
print "Missing Id list", missing_id
print "End time", datetime.datetime.now()
Well you could do this:
db.eval(function(){
db.primary_collection.find({},
{ id: 1, parentid: 1, title: 1 }).forEach(function(doc){
var newDoc = {};
Object.keys(doc).forEach(function(key) {
var newKey = ( key == "_id" ) ? key : "p$" + key;
newDoc[newKey] = doc[key];
});
db.secondary_collection.insert(newDoc);
});
})
Which uses db.eval() to execute the code on the server, which will be as fast as you will get.
But please read the documentation on this as you will be "locking" the database while this operation takes place. And of course you cannot do this across servers if that is your intent.