So I have a large collection and I want to count the number of documents that all have the same company_id
with other filters(either expired_at
is greater than new Date()
or expired_at
doesn't exist in the document). Basically to count the number of active jobs belong to a company.
Here is what I have so far but the count result is way bigger than it should be. Can anybody tell me what is wrong with the query?
Maybe there are duplicates? If thats the case, how to group duplicates as one or avoid them when counting? Thanks.
db.jobs.aggregate([
{
$match: {
$and: [
{company_id: ObjectId('524a09a44c9ff23382000037')},
{ $or: [ { expired_at: { $gt: new Date() } }, { expired_at: { $exists: false } } ]}
]
}
},
{
$group: {
_id: 1,
count: {$sum: 1}
}
}
])
Actual code:
query := bson.M{
"$and": []bson.M{
bson.M{"company_id": id},
bson.M{
"$or": []bson.M{
bson.M{"expired_at": bson.M{"$exists": false}},
bson.M{"expired_at": bson.M{"$gt": bson.Now()}},
},
},
},
}
counter := []bson.M{
bson.M{"$match": query},
bson.M{"$group": bson.M{"_id": 1, "count": bson.M{"$sum": 1}}},
}
var result struct {
Count int `bson:"count"`
}
err := s.DB(session).C("jobs").Pipe(counter).One(&result)
This is what the schema looks like:
{
"_id" : ObjectId("52683eceda9f660e1e000011"),
"activated_at" : ISODate("2014-05-30T09:18:40.961Z"),
"url" : "http://blahblah/jobid3939799-public-sector-oracle-federal-financials-senior-associate-jobs",
"job_category_ids" : [
"Accounting/Auditing",
"Finance",
"Information Technology",
"Consulting"
],
"location" : {
"full_address" : "McLean, VA",
"pts" : [
-77.1772604,
38.9338676
]
},
"created_at" : ISODate("2013-10-23T21:25:34.262Z"),
"ref_id" : "42927BR-0",
"company_id" : ObjectId("524a09a44c9ff23382000037"),
"updated_at" : ISODate("2014-05-30T09:18:41.085Z"),
"expired_at" : ISODate("2014-05-31T09:21:30.357Z")
}