Do I need to close connection in mongodb?
Solution 1
There's no need to close a Connection
instance, it will clean up after itself when Python garbage collects it.
You should use MongoClient
instead of Connection
; Connection
is deprecated. To take advantage of connection pooling, you could create one MongoClient
that lasts for the entire life of your process.
PyMongo represents documents as dicts. Why are you encoding each dict it gives you as JSON, then decoding it again? It may be more efficient to modify the objects directly.
That said, I agree with user3683180 that the real problem--the reason MongoDB is taking so much CPU--is in your schema or index design, not in your Python code.
Solution 2
Given the name of your database 'indexer', and the 'unique' property which requires an index, I'm thinking your CPU usage might have nothing to do with this code.
Try using mongostat and mongotop to see what mongo is spending its time doing.. I'm thinking you'll find it's spending time crunching data and that your code is just fine.
bor
Updated on August 01, 2022Comments
-
bor almost 2 years
I am using python and django to make web based application. I am using mongodb as backend database. I have a base class named MongoConnection that uses pymongo layers to communicate with the mongodb. I am very fine with this layer, as it seperates database from the business layer for me. My custom MongoConnenction class is as follows:-
#!/usr/bin/env python # encoding: utf-8 # Create your views here. from pymongo import MongoClient import pymongo from pymongo import Connection import json from bson import BSON from bson import json_util class MongoConnection(): def __init__ (self, host="localhost",port=27017, db_name='indexer', conn_type="local", username='', password=''): self.host = host self.port = port self.conn = Connection(self.host, self.port) self.db = self.conn[db_name] self.db.authenticate(username, password) def ensure_index(self, table_name, index=None): self.db[table_name].ensure_index([(index,pymongo.GEOSPHERE)]) def create_table(self, table_name, index=None): self.db[table_name].create_index( [(index, pymongo.DESCENDING)] ) def get_one(self,table_name,conditions={}): single_doc = self.db[table_name].find_one(conditions) json_doc = json.dumps(single_doc,default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(json_doc) def get_all(self,table_name,conditions={}, sort_index ='_id', limit=100): all_doc = self.db[table_name].find(conditions).sort(sort_index, pymongo.DESCENDING).limit(limit) json_doc = json.dumps(list(all_doc),default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(str(json_doc)) def insert_one(self, table_name, value): self.db[table_name].insert(value) def update_push(self, table_name, where, what): #print where, what self.db[table_name].update(where,{"$push":what},upsert=False) def update(self, table_name, where, what): #print where, what self.db[table_name].update(where,{"$set":what},upsert=False) def update_multi(self, table_name, where, what): self.db[table_name].update(where,{"$set":what},upsert=False, multi=True) def update_upsert(self, table_name, where, what): self.db[table_name].update(where,{"$set":what},upsert=True) def map_reduce(self, table_name, mapper, reducer, query, result_table_name): myresult = self.db[table_name].map_reduce(mapper, reducer, result_table_name, query) return myresult def map_reduce_search(self, table_name, mapper, reducer,query, sort_by, sort = -1, limit = 20): if sort_by == "distance": sort_direction = pymongo.ASCENDING else: sort_direction = pymongo.DESCENDING myresult = self.db[table_name].map_reduce(mapper,reducer,'results', query) results = self.db['results'].find().sort("value."+sort_by, sort_direction).limit(limit) json_doc = json.dumps(list(results),default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(str(json_doc)) def aggregrate_all(self,table_name,conditions={}): all_doc = self.db[table_name].aggregate(conditions)['result'] json_doc = json.dumps(list(all_doc),default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(str(json_doc)) def group(self,table_name,key, condition, initial, reducer): all_doc = self.db[table_name].group(key=key, condition=condition, initial=initial, reduce=reducer) json_doc = json.dumps(list(all_doc),default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(str(json_doc)) def get_distinct(self,table_name, distinct_val, query): all_doc = self.db[table_name].find(query).distinct(distinct_val) count = len(all_doc) parameter = {} parameter['count'] = count parameter['results'] = all_doc return parameter def get_all_vals(self,table_name,conditions={}, sort_index ='_id'): all_doc = self.db[table_name].find(conditions).sort(sort_index, pymongo.DESCENDING) json_doc = json.dumps(list(all_doc),default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(json_doc) def get_paginated_values(self, table_name, conditions ={}, sort_index ='_id', pageNumber = 1): all_doc = self.db[table_name].find(conditions).sort(sort_index, pymongo.DESCENDING).skip((pageNumber-1)*15).limit(15) json_doc = json.dumps(list(all_doc),default=json_util.default) json_doc = json_doc.replace("$oid", "id") json_doc = json_doc.replace("_id", "uid") return json.loads(json_doc) def get_count(self, table_name,conditions={}, sort_index='_id'): count = self.db[table_name].find(conditions).count() return count
Now, the problem is my moongodb uses huge amount of processing power and RAM. Normally it consumes around 80-90 % of CPU.
I suspect I am not closing the mongoconnection everytime I create instance of this class. Do I need to close connection manually in mongodb??
-
bor almost 10 yearsDo I need to close connection manually or is it handled by the mongodb itself?
-
user3683180 almost 10 yearsConnection is deprecated (api.mongodb.org/python/current/api/pymongo/connection.html). MongoClient is the better call to use. It will transparently pool connections for you - api.mongodb.org/python/current/…. You don't need to close connections manually, in fact you'll probably get worse performance if you do. Automatic connection re-use is a performance enhancer.