insert_many not working when adding one more case - mongodb

I used the following code to insert tab0011.json into portal_db.acs:
from pymongo import MongoClient
import json
client = MongoClient()
db = client.portal_db
db.acs.drop()
acs = db.acs
data_acs = json.load(open('/vagrant/data/tab0011.json', 'r'))
result_acs = acs.insert_many(data_acs)
The code has stored the tab0011.json data correclty. However, I tried the following code to insert tab0011.json into portal_db.acs and tab0007.json into portal_db.tab0007. Both collections were created but with none inside, i.e., empty:
from pymongo import MongoClient
import json
client = MongoClient()
db = client.portal_db
db.acs.drop()
acs = db.acs
db.tab0007.drop()
tab0007 = db.tab0007
data_acs = json.load(open('/vagrant/data/tab0011.json', 'r'))
data_tab0007 = json.load(open('/vagrant/data/tab0007.json', 'r'))
result_acs = acs.insert_many(data_acs)
result_tab0007 = tab0007.insert_many(data_tab0007)
Not quite sure why.

If the file extension is .json I am able to read the data via the methods used in your code and insert them into collections in the same database. I can see the data that I used in both the respective collections
Maybe you can try doing it this way:
from pymongo import MongoClient
import json
client = MongoClient(host="localhost", port=27017)
db = client["portal_db"]
acs = db.get_collection("acs")
tab0007 = db.get_collection("tab0007")
db.drop_collection("acs")
db.drop_collection("tab0007")
data_acs = json.load(open('/vagrant/data/tab0011.json', 'r'))
data_tab0007 = json.load(open('/vagrant/data/tab0007.json', 'r'))
acs_inserts = acs.insert_many(data_acs)
tab_inserts = tab0007.insert_many(data_tab0007)
print(acs_insert.inserted_ids)
print(tab_inserts.inserted_ids)
The last two lines would print the ObjectIds of the Documents inserted.

Related

Implementation of count(*) in Graphene / Mongo

Good afternoon,
How may I implement a count of items back of MongoDB request and make it available through GraphQl request ? I am currently using MongoDB <=> MongoEngine <=> graphene <=> Flask .
Any help will be welcomed .
Thanks
B.
After a lot of time, reading forums and Internet pages, I am now able to retrieve the total count of entity send back from MongoDB.
Here is the Models.py
from mongoengine import Document
from mongoengine.fields import StringField
class User(Document):
meta = {'collection': 'user'}
first_name = StringField(required=True)
last_name = StringField(required=True)
Here is the Schema.py
from graphene_mongo import MongoengineObjectType,MongoengineConnectionField
import graphene
from graphene.relay import Node
from Models import User as UserModel
from mongoengine import connect
from flask import Flask
from flask_graphql import GraphQLView
connect(db="graphene-mongo-example",host="127.0.0.1:27017",alias="default")
class Connection(graphene.Connection):
class Meta:
abstract = True
total_count = graphene.Int()
def resolve_total_count(self, info):
return len(self.edges)
class User(MongoengineObjectType):
class Meta:
model = UserModel
interfaces=(Node,)
filter_fields = {'first_name': {'startswith', 'contains'}, 'last_name': [""]}
connection_class = Connection
class Query(graphene.ObjectType):
Node=Node.Field()
all_users = MongoengineConnectionField(User)
schema = graphene.Schema(query=Query)
app = Flask(__name__)
app.debug = True
app.add_url_rule(
"/graphql", view_func=GraphQLView.as_view("graphql", schema=schema, graphiql=True,types=[User])
)
if __name__ == "__main__":
app.run()
To run this example :
python Schema.py

mongoengine connection and multiple databases

I have 2 databases I want to query from, but I only get results from one. I'm using mongoengine with python and graphene (it's my first time). I've exhausted my search and I don't understand how I can resolve this issue. Here is my code:
import graphene
from mongoengine import Document, connect
from mongoengine.context_managers import switch_collection
from mongoengine.fields import (
StringField,
UUIDField,
IntField,
FloatField,
BooleanField,
)
from graphene_mongo import MongoengineObjectType
from mongoengine.connection import disconnect
class UserModel(Document):
meta = {"collection": "users"}
userID = UUIDField()
first_name = StringField()
last_name = StringField()
class Users(MongoengineObjectType):
class Meta:
model = UserModel
class UsersQuery(graphene.ObjectType):
users = graphene.List(Users)
user = graphene.Field(Users, userID=graphene.UUID())
def resolve_users(self, info):
db = connect("users")
users = list(UserModel.objects.all())
db.close()
return users
def resolve_user(self, info, userID):
return UserModel.objects(userID=userID).first()
users_schema = graphene.Schema(query=UsersQuery)
import graphene
from mongoengine import Document, connect
from mongoengine.fields import StringField, UUIDField
from graphene_mongo import MongoengineObjectType
from mongoengine.connection import disconnect
class Workout(Document):
meta = {"collection": "workouts"}
workoutID = UUIDField()
workout_label = StringField()
class Workouts(MongoengineObjectType):
class Meta:
model = Workout
class Query(graphene.ObjectType):
workouts = graphene.List(Workouts)
workout = graphene.Field(Workouts, workoutID=graphene.UUID())
def resolve_workouts(self, info):
db = connect("workouts")
wks = list(Workout.objects.all())
db.close()
return wks
def resolve_workout(self, info, workoutID):
return Workout.objects(workoutID=workoutID).first()
workouts_schema = graphene.Schema(query=Query)
Now when I have my python server up, mongod running I can hit the /workouts and it will return the array I need. But /users will not return the results.
I get no errors, nothing is wrong with my graphene query.
I can only get one of the queries to work at once.
I have tried using alias, not closing the connections, declaring the connect at the top level even before class UserModel or Workout.
If each of your model is bound to a different database. You should use something like this (cfr docs):
connect('workouts', alias='dbworkouts') # init a connection to database named "workouts" and register it under alias "dbworkouts"
connect('users', alias='dbusers')
class Workout(Document):
meta = {"db_alias": "dbworkouts"}
workoutID = UUIDField()
...
class UserModel(Document):
meta = {"db_alias": "dbusers"}
userID = UUIDField()
...

Get "holes" in dates in MogoDB collection

I have a MongoDB collection that stores data for each hour since 2011.
For example:
{
"dateEntity" : ISODate("2011-01-01T08:00:00Z"),
"price" : 0.3
}
{
"dateEntity" : ISODate("2011-01-01T09:00:00Z"),
"price" : 0.35
}
I'd like to know if there are "holes" in that dates. For example, a missing entry at a hour.
Unfortunately, there is no gaps-marking aggregator in Mongodb.
I have checked if it's possible to write an own gaps-aggregator for Mongodb basing on Javascript functions in Map-Reduce pipelines by creating a time raster in the first map stage and then mapping it to its corresponding values, but database reads are discouraged while mapping and reducing, so it would be bad design. So, it is not possible to achieve this with Mongodb-own instruments.
I think, there are two possible solutions.
Solution one: Use a driver like the Java driver
I suggest you could use an idiomatic driver like the Java driver for your Mongodb data and create a raster of hours like in the Test provided.
import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient;
import com.mongodb.ServerAddress;
import com.mongodb.client.MongoCollection;
import org.bson.Document;
import org.junit.Test;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class HourGapsTest {
#Test
public void testHourValues() {
String host = "127.0.0.1:27017";
ServerAddress addr = new ServerAddress(host);
MongoClient mongoClient = new MongoClient(addr);
MongoCollection<Document> collection = mongoClient.getDatabase("sotest").getCollection("hourhole");
LocalDateTime start = LocalDateTime.of(2011, 1, 1, 8, 0, 0);
LocalDateTime end = LocalDateTime.of(2011, 1, 2, 0, 0, 0);
List<LocalDateTime> allHours = new ArrayList<>();
for (LocalDateTime hour = start; hour.isBefore(end); hour = hour.plusHours(1L)) {
allHours.add(hour);
}
List<LocalDateTime> gaps = new ArrayList<>();
for (LocalDateTime hour : allHours) {
BasicDBObject filter = new BasicDBObject("dateEntity", new Date(hour.toInstant(ZoneOffset.UTC).toEpochMilli()));
if (!collection.find(filter).iterator().hasNext()) {
gaps.add(hour);
}
}
gaps.forEach(System.out::println);
}
}
Solution two: Use a timeseries database
However, timeseries databases like Kairosdb provide this functionality. Consider storing these time-value data in a timeseries database.

performance loop while get data from gridFS

I'm using pymongo to get the data from gridFS, the loop while getting this data is really slow.
Is it possible to avoid that loop, or is any way to do that faster??
from pymongo import MongoClient
from pprint import pprint
import bson
from gridfs import GridFS
import json
import pandas as pd
client = MongoClient()
client.database_names()
db = client['MC']
fs = GridFS(db, collection="MC")
db.collection_names(include_system_collections=False)
collectionFiles = db['MC.files']
collectionChunk = db['MC.chunks']
files = db['MC.files'].find({"metadata.Feature0": "00011"})
for n in files:
file_id = n['_id']
chunks = db['MotorCalculo.chunks'].find({"files_id": file_id})
bsondData = (fs.get(file_id).read())
decData = bsondData.decode()
jsonData = json.loads(decData)
F1 = jsonData['Feature1']
F2 = jsonData['Feature2']
If you have enough RAM, it should be faster to access file groups and not make as many calls to mongo.
You can try something like this:
batch_file_id = ['#1', '#2', '#3', '#4']
chunks = db['MotorCalculo.chunks'].find('{\"files_id\" : {\"$in\":[{\"$oid\":\"' + '\"}, {\"$oid\":\"'.join(batch_file_id) + '\"}]}}')
...
batch_file_id
Out[1]: ['#1', '#2', '#3', '#4']
'{\"files_id\" : {\"$in\":[{\"$oid\":\"' + '\"}, {\"$oid\":\"'.join(batch_file_id) + '\"}]}}'
Out[2]: '{"files_id" : {"$in":[{"$oid":"#1"}, {"$oid":"#2"}, {"$oid":"#3"}, {"$oid":"#4"}]}}'
Regards!!

MongoAlchemy query embedded documents

I want to know how to use MongoAlchemy about embeded docment operation.
But I havn't find any documents about these.
Can anyone give me some helps?
Here is demo code:
#!/usr/bin/python
# -*- coding: utf-8 -*-
from flask import Flask
from flaskext.mongoalchemy import MongoAlchemy
app = Flask(__name__)
app.config['DEBUG'] = True
app.config['MONGOALCHEMY_DATABASE'] = 'book'
db = MongoAlchemy(app)
class Comment(db.Document):
user_id = db.StringField(db_field='uid')
posted = db.StringField(db_field='posted')
class Book(db.Document):
title = db.StringField()
author = db.StringField()
comments = db.ListField(db.DocumentField(Comment), db_field='Comments')
from mongoalchemy.session import Session
def test():
with Session.connect('book') as s:
s.clear_collection(Book)
save()
test_Book()
def save():
title = "Hello World"
author = 'me'
comment_a = Comment(user_id='user_a', posted='post_a')
comment_b = Comment(user_id='user_b', posted='post_b')
comments = [comment_a, comment_b]
book = Book(title=title, author=author, comments=comments)
book.save()
def test_Book():
book = Book.query.filter({'author':'me'}).first()
comment = book.comments[0]
comment.posted = str(book.comments[0].posted)+'_new'
book.save()
print 'change posted: Book.comments[0].posted:', book.comments[0].posted
comment_c = Comment(user_id='user_c', posted='post_c')
book.comments.append(comment_c)
book.save()
print 'append: Book.comments[2].posted:', book.comments[2].posted
query = Book.query.filter({Book.comments:{'$elemMatch':{Comment.user_id:'user_c'}}}).limit(1).first()
print 'query type:', type(query)
if __name__ == '__main__':
test()
I want to query data which user_id is "user_c", and just return back one Comment, How can I do that?
Does these methods below are MongoAlchemy remommended? BTW, these methods will return the whole document.
#query = Book.query.filter({Book.comments:{'uid':'user_c'}}).limit(1).first()
#query = Book.query_class(Comment).filter(Comment.user_id == 'user_c').limit(1).first()
#query = Book.query.filter({'comments':{'$elemMatch':{'uid':'user_c'}}}).limit(1).first()
#query = Book.query.filter({Book.comments:{'$elemMatch':{Comment.user_id:'user_c'}}}).limit(1).first()
How can I change "user_c" to "user_c_new" which find by query ?
How can I remove one comment which user_id is "user_b"?
Mongo doesn't support returning subdocuments. You can use $elemMatch to filter so that only documents with matching attributes are returned, but you'll have to grab the comments yourself. You could slightly optimize by only returning the comments field as follows:
query = Book.query.filter({Book.comments:{'$elemMatch':{Comment.user_id:'user_c'}}})
query = query.fields(Book.comments.elem_match({Comment.user_id:'user_c'}))
result = query.limit(1).first()
print 'query result:', result.comments
Note that there was a bug with this up until 0.14.3 (which I just released a few minutes ago) which would have caused results.comments not to work.
Another very important note is that the elem_match I'm doing there only returns the first matching element. If you want all matching elements you have to filter them yourself:
query = Book.query.filter({Book.comments:{'$elemMatch':{Comment.user_id:'user_c'}}})
result = query.limit(1).first()
print 'query result:', [c for c in result.comments if c.user_id == 'user_c']