Create a mongodb collection index with expireAfterSeconds using pymongo - mongodb

I have a collection in mongodb. In my python program, I have a variable named coll point at it. I want to create an index on a specified field, digestedOn, which will cause expiration of the record after 7776000 seconds.
I know how to create a simple index in python: coll.create_index([( "digestedOn", pymongo.ASCENDING)]). Where do I stick the {"expireAfterSeconds": 7776000} part?
Here's my whole program, I need the last line fixed so that the index is created with expireAfterSeconds.
import pymongo
import ssl
def connect_to_mongo(host, port, ssls, user, password, auth_source):
return pymongo.MongoClient(host, port, ssl=ssls, username=user, ssl_cert_reqs=ssl.CERT_NONE,
password=password, authSource=auth_source,
authMechanism='SCRAM-SHA-1', maxPoolSize=None)
client = connect_to_mongo(host="10.10.10.10", port=27017, ssls=True, user="user",
password="password",auth_source="admin")
db = client['logs']
colnames = db.list_collection_names()
coll = db[colnames[0]]
coll.create_index([( "digestedOn", pymongo.ASCENDING )])

Just pass it as a named parameter:
coll.create_index([( "digestedOn", pymongo.ASCENDING )], expireAfterSeconds=7776000)

Related

how to declare query to fetch data from mongodb?

import pandas as pd
from pymongo import MongoClient
import matplotlib.pyplot as plt
def _connect_mongo(host, port, db):
""" A util for making a connection to mongo
if username and password:
mongo_uri = 'mongodb://%s:%s#%s:%s/%s' % (username, password, host, port, db)
conn = MongoClient(mongo_uri)
else:
"""
conn = MongoClient(host, port)
return conn[db]
def read_mongo(db, collection, host, port, query):
""" Read from Mongo and Store into DataFrame """
# Connect to MongoDB
db = _connect_mongo(host=host, port=port, db=db)
# Make a query to the specific DB and Collection
cursor = db[collection].find(query)
# Expand the cursor and construct the DataFrame
df = pd.DataFrame(list(cursor))
'''
Delete the _id
if no_id:
del df['_id']
'''
return df
#initialization
db = 'twittersmall'
collection='twitterdata'
query='{lang:{$exists: true}}'
host='localhost'
port=27017
var = read_mongo(db, collection, host, port, query)
print var
tweets_by_lang = var['lang'].value_counts()
fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Languages', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 languages', fontsize=15, fontweight='bold')
tweets_by_lang[:5].plot(ax=ax, kind='bar', color='red')
In this code, I was trying to fetch those data from mongodb where language field exists(might be null). So in the attribute, query I assigned a filter that will be used in the fetching operation. But the problem is, when I initialize query='{lang:{$exists: true}}', query is of string datatype and query must be a dictionary. When I declare query={lang:{$exists: true}} , it says -> syntax error. Obviously because so far as I know declaration of dictionary is {'key':'value'} . And when I declare like this query={'lang':'{$exists: true}'} it doesn't work because of keyError as there's filed in the database called lang.
So, how to declare this query and pass it in the method?
ps: when I use query={lang:{$exists: true}} in Webstorm terminal, it works, but I am currently working on jupyter notebook that integrates ipython, so that I can create graph, charts using the data from mongodb. I also used pandas for dataframe.

How to add data in right collection in pymongo?

I want to add data in right collection considering by name. The code below is defining well. collection(db,name) returns the name of collection. But when I want to save the collection name via rightCollection = collections(db, name) and inserting it as db.rightCollection.insert({"1" : "Righ collection"}). Pymongo is creating the collection under name rightCollection not Peter. I want to insert data in Peter. Why is it so? Can I resolve it?
from pymongo import MongoClient
def collections(db,name):
if(name is 'Peter'):
return db.Peter
client = MongoClient()
db = client.myDB
name="Peter"
rightCollection = collections(db, name)
db.rightCollection.insert({"1" : "Righ collection"})
Using pymongo 3.2.2, you don't need the collections function, you can just use the collection name directly:
from pymongo import MongoClient
client = MongoClient()
db = client.myDB
db.Peter.insert_one({'1': 'Right collection'})
That should insert the document {'1': 'Right collection} into collection Peter under database myDB. To verify that the data is inserted correctly, you can use the mongo shell:
> use myDB
> db.Peter.find()
{ "_id": ObjectId("57df7a4f98e914c98d540992"), "1": "Right collection" }
Or, if you need the name Peter to be defined in a variable, you can do:
from pymongo import MongoClient
client = MongoClient()
db = client.myDB
coll_name = 'Peter'
db[coll_name].insert_one({'1': 'Right collection'})

iterating over a list of values of single key in a collection in mongodb

i have a collection of financial data stored in mongodb. each company symbol has its data. the question is how to iterate over the collection and change the value of the key which is the symbol company to print out the whole collection and this is my list of companies ['TSLA','TYO','C','LULU','DTV','SHS',' ZNGA'] and this is my cod which return the data of one company:
from pymongo import MongoClient
import csv
host = "localhost"
port = 27017
databaseName = "finance000"
collection_name = "income"
client = MongoClient(host, port)
database = client[databaseName]
collection = database[collection_name]
def finance_data(symbol):
Earnings_BeforeInterestAndTaxes = symbol['statement'[0]EarningsBeforeInterestAndTaxes']['content']
Total_Revenue = symbol['statement'][0]['TotalRevenue']['content']
return Earnings_BeforeInterestAndTaxes,Total_Revenue
i =collection.find({'symbol':'C'})
with open('D:/INCOMEdata.csv', "w") as output:
writer = csv.writer(output, lineterminator='\n')
for key in i :
print finance_data(key)
writer.writerow(finance_data(key))
If I understood you correctly. You want to retrieve the document/data for a given company. The company is denoted by a symbol example 'TYSLA'.
If that is what you need. Here is how you do it (assuming each symbol is unique)
company_data = collection.find({'symbol':'TYSLA'})
The above will return a dictionary. To access an element within the document you just use:
company_data['profit'] #profit is an example of an attribute name. You can use any attribute you like.
Assuming you have multiple companies with the same symbol. If you used the above command. you will get a cursor. to get each company just do a for loop example:
company_data = collection.find({'symbol':'TYSLA'})
Now to loop:
for one in company_date:
print one['profit'] #python 2.7
Now to edit the say for example the profit attribute use $set
collection.update_one({'symbol':'TYSLA'},{'profit':100})
the above will change TYSLA's company profit to 100
Update
Assuming you have a collection with the symbol being any value of ['TSLA','TYO','C','LULU','DTV','SHS',' ZNGA']. to get the data for any of the symbols you use (assuming symbol contains any of the names (only one name)):
you use the $or as:
collection.find({"$or":[ {'symbol':'TSLA},{'symbol':'TYO},... ]},{}) #the ... are the rest of the names you need
The above returns the whole data for a given symbol. To return the specifics Total_Revenue and Earnings_BeforeInterestAndTaxes you use the following:
collection.find({"$or":[ {'symbol':'TSLA},{'symbol':'TYO},... ]},{'Total_Revenue ':1,'Earnings_BeforeInterestAndTaxes ':1, _id:0 }) #if you remove _id it will always be returned
I hope that helps.

autocreate collection and type of creation

I have a simple example:
conn = Connection()
db = conn.livestat
coll = db.cmn1
coll.insert({'test': 1 });
before insert collection autocreate. How to disable this function or add param to autocreate (for example I want to create a capped collection ) ?
You'll have to do it manually. See the doc.
db.createCollection("cmn1", {capped:true, size:100000})
You must create the capped collection before you write to it for the first time. Please use this function:
http://api.mongodb.org/python/current/api/pymongo/database.html#pymongo.database.Database.create_collection

How to get the object id in PyMongo after an insert?

I'm doing a simple insert into Mongo...
db.notes.insert({ title: "title", details: "note details"})
After the note document is inserted, I need to get the object id immediately. The result that comes back from the insert has some basic info regarding connection and errors, but no document and field info.
I found some info about using the update() function with upsert=true, I'm just not sure if that's the right way to go, I have not yet tried it.
One of the cool things about MongoDB is that the ids are generated client side.
This means you don't even have to ask the server what the id was, because you told it what to save in the first place. Using pymongo the return value of an insert will be the object id. Check it out:
>>> import pymongo
>>> collection = pymongo.Connection()['test']['tyler']
>>> _id = collection.insert({"name": "tyler"})
>>> print _id.inserted_id
4f0b2f55096f7622f6000000
The answer from Tyler does not work for me.
Using _id.inserted_id works
>>> import pymongo
>>> collection = pymongo.Connection()['test']['tyler']
>>> _id = collection.insert({"name": "tyler"})
>>> print(_id)
<pymongo.results.InsertOneResult object at 0x0A7EABCD>
>>> print(_id.inserted_id)
5acf02400000000968ba447f
It's better to use insert_one() or insert_many() instead of insert(). Those two are for the newer version. You can use inserted_id to get the id.
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
myDB = myclient["myDB"]
userTable = myDB["Users"]
userDict={"name": "tyler"}
_id = userTable.insert_one(userDict).inserted_id
print(_id)
Or
result = userTable.insert_one(userDict)
print(result.inserted_id)
print(result.acknowledged)
If you need to use insert(), you should write like the lines below
_id = userTable.insert(userDict)
print(_id)
Newer PyMongo versions depreciate insert, and instead insert_one or insert_many should be used. These functions return a pymongo.results.InsertOneResult or pymongo.results.InsertManyResult object.
With these objects you can use the .inserted_id and .inserted_ids properties respectively to get the inserted object ids.
See this link for more info on insert_one and insert_many and this link for more info on pymongo.results.InsertOneResult.
updated; removed previous because it wasn't correct
It looks like you can also do it with db.notes.save(...), which returns the _id after it performs the insert.
See for more info:
http://api.mongodb.org/python/current/api/pymongo/collection.html
some_var = db.notes.insert({ title: "title", details: "note details"})
print(some_var.inserted_id)
You just need to assigne it to some variable:
someVar = db.notes.insert({ title: "title", details: "note details"})
To get the ID after an Insert in Python, just do like this:
doc = db.notes.insert({ title: "title", details: "note details"})
return str(doc.inserted_id) # This is to convert the ObjectID (type of doc.inserted_id into string)