Skip to content

Instantly share code, notes, and snippets.

@leifwalsh
Created July 27, 2013 17:41
Show Gist options
  • Save leifwalsh/6095653 to your computer and use it in GitHub Desktop.
Save leifwalsh/6095653 to your computer and use it in GitHub Desktop.
diff --git a/src/mongo/db/security.cpp b/src/mongo/db/security.cpp
index 14408d2..dac1a54 100644
--- a/src/mongo/db/security.cpp
+++ b/src/mongo/db/security.cpp
@@ -24,6 +24,7 @@
#include "mongo/db/client.h"
#include "mongo/db/curop.h"
#include "mongo/db/dbhelpers.h"
+#include "mongo/db/storage/exception.h"
// this is the _mongod only_ implementation of security.h
@@ -85,19 +86,37 @@ namespace mongo {
return;
if ( ! noauth ) {
- Client::GodScope gs;
- Client::ReadContext ctx("admin.system.users");
- Client::Transaction txn(DB_TXN_SNAPSHOT | DB_TXN_READ_ONLY);
- BSONObj result;
- if( Helpers::getSingleton("admin.system.users", result) ) {
- _isLocalHostAndLocalHostIsAuthorizedForAll = false;
+ for (int tries = 0; ; ++tries) {
+ try {
+ Client::GodScope gs;
+ Client::ReadContext ctx("admin.system.users");
+ Client::Transaction txn(DB_TXN_SNAPSHOT | DB_TXN_READ_ONLY);
+ BSONObj result;
+ if( Helpers::getSingleton("admin.system.users", result) ) {
+ _isLocalHostAndLocalHostIsAuthorizedForAll = false;
+ }
+ else if ( ! _warned ) {
+ // you could get a few of these in a race, but that's ok
+ _warned = true;
+ log() << "note: no users configured in admin.system.users, allowing localhost access" << endl;
+ }
+ txn.commit();
+ if (tries > 0) {
+ DEV LOG(0) << "_checkLocalHostSpecialAdmin succeeded after " << tries << " retries" << endl;
+ }
+ break;
+ }
+ catch (storage::RetryableException) {
+ if (tries < 5) {
+ DEV LOG(0) << "_checkLocalHostSpecialAdmin retry #" << (tries + 1) << endl;
+ sleepmillis(1);
+ }
+ else {
+ DEV LOG(0) << "_checkLocalHostSpecialAdmin tried 5 times, giving up..." << endl;
+ throw;
+ }
+ }
}
- else if ( ! _warned ) {
- // you could get a few of these in a race, but that's ok
- _warned = true;
- log() << "note: no users configured in admin.system.users, allowing localhost access" << endl;
- }
- txn.commit();
}
@@ -128,18 +147,33 @@ namespace mongo {
}
else {
string systemUsers = dbname + ".system.users";
- {
- Client::ReadContext tc(systemUsers, dbpath, false);
- Client::Transaction txn(DB_TXN_SNAPSHOT | DB_TXN_READ_ONLY);
+ for (int tries = 0; ; ++tries) {
+ try {
+ Client::ReadContext tc(systemUsers, dbpath, false);
+ Client::Transaction txn(DB_TXN_SNAPSHOT | DB_TXN_READ_ONLY);
- BSONObjBuilder b;
- b << "user" << user;
- BSONObj query = b.done();
- if( !Helpers::findOne(systemUsers.c_str(), query, userObj) ) {
- log() << "auth: couldn't find user " << user << ", " << systemUsers << endl;
- return false;
+ BSONObjBuilder b;
+ b << "user" << user;
+ BSONObj query = b.done();
+ if( !Helpers::findOne(systemUsers.c_str(), query, userObj) ) {
+ log() << "auth: couldn't find user " << user << ", " << systemUsers << endl;
+ return false;
+ }
+ txn.commit();
+ if (tries > 0) {
+ DEV LOG(0) << "getUserObj succeeded after " << tries << " retries" << endl;
+ }
+ break;
+ }
+ catch (storage::RetryableException) {
+ if (tries < 5) {
+ DEV LOG(0) << "getUserObj retry #" << (tries + 1) << endl;
+ }
+ else {
+ DEV LOG(0) << "getUserObj tried " << tries << " times, giving up..." << endl;
+ throw;
+ }
}
- txn.commit();
}
pwd = userObj.getStringField("pwd");
diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp
index 6655845..000628c 100644
--- a/src/mongo/s/d_migrate.cpp
+++ b/src/mongo/s/d_migrate.cpp
@@ -49,6 +49,7 @@
#include "mongo/db/namespace_details.h"
#include "mongo/db/ops/insert.h"
#include "mongo/db/ops/update.h"
+#include "mongo/db/storage/exception.h"
#include "mongo/client/connpool.h"
#include "mongo/client/distlock.h"
@@ -1180,83 +1181,108 @@ namespace mongo {
// local one (so to bump version for the entire shard)
try {
- scoped_ptr<ScopedDbConnection> conn(ScopedDbConnection::getInternalScopedDbConnection(shardingState.getConfigServer(), 10.0));
- RemoteTransaction txn(conn->conn(), "serializable");
-
- // Check the precondition
- BSONObjBuilder b;
- b.appendTimestamp("lastmod", maxVersion.toLong());
- BSONObj expect = b.obj();
- Matcher m(expect);
-
- BSONObj found = conn->get()->findOne(ShardNS::chunk, QUERY("ns" << ns).sort("lastmod", -1));
- if (!m.matches(found)) {
- // TODO(leif): Make sure that this means the sharding algorithm is broken and we should bounce the server.
- error() << "moveChunk commit failed: " << ShardChunkVersion::fromBSON(found["lastmod"])
- << " instead of " << maxVersion << migrateLog;
- error() << "TERMINATING" << migrateLog;
- dbexit(EXIT_SHARDING_ERROR);
- }
-
- try {
- // update for the chunk being moved
- BSONObjBuilder n;
- n.append( "_id" , Chunk::genID( ns , min ) );
- myVersion.addToBSON( n, "lastmod" );
- n.append( "ns" , ns );
- n.append( "min" , min );
- n.append( "max" , max );
- n.append( "shard" , toShard.getName() );
- conn->get()->update(ShardNS::chunk, QUERY("_id" << Chunk::genID(ns, min)), n.obj());
- }
- catch (DBException &e) {
- warning() << e << migrateLog;
- error() << "moveChunk error updating the chunk being moved" << migrateLog;
- throw e;
- }
-
- nextVersion = myVersion;
-
- // if we have chunks left on the FROM shard, update the version of one of them as well
- // we can figure that out by grabbing the chunkManager installed on 5.a
- // TODO expose that manager when installing it
-
- ShardChunkManagerPtr chunkManager = shardingState.getShardChunkManager(ns);
- if (chunkManager->getNumChunks() > 0) {
- // get another chunk on that shard
- BSONObj lookupKey;
- BSONObj bumpMin, bumpMax;
- do {
- chunkManager->getNextChunk( lookupKey , &bumpMin , &bumpMax );
- lookupKey = bumpMin;
- }
- while( bumpMin == min );
-
- nextVersion.incMinor(); // same as used on donateChunk
+ for (int tries = 0; ; ++tries) {
+ scoped_ptr<ScopedDbConnection> conn;
try {
- BSONObjBuilder n;
- n.append( "_id" , Chunk::genID( ns , bumpMin ) );
- nextVersion.addToBSON( n, "lastmod" );
- n.append( "ns" , ns );
- n.append( "min" , bumpMin );
- n.append( "max" , bumpMax );
- n.append( "shard" , fromShard.getName() );
- conn->get()->update(ShardNS::chunk, QUERY("_id" << Chunk::genID(ns, bumpMin)), n.obj());
- log() << "moveChunk updating self version to: " << nextVersion << " through "
- << bumpMin << " -> " << bumpMax << " for collection '" << ns << "'" << migrateLog;
+ conn.reset(ScopedDbConnection::getInternalScopedDbConnection(shardingState.getConfigServer(), 10.0));
+ RemoteTransaction txn(conn->conn(), "serializable");
+
+ // Check the precondition
+ BSONObjBuilder b;
+ b.appendTimestamp("lastmod", maxVersion.toLong());
+ BSONObj expect = b.obj();
+ Matcher m(expect);
+
+ BSONObj found = conn->get()->findOne(ShardNS::chunk, QUERY("ns" << ns).sort("lastmod", -1));
+ if (!m.matches(found)) {
+ // TODO(leif): Make sure that this means the sharding algorithm is broken and we should bounce the server.
+ error() << "moveChunk commit failed: " << ShardChunkVersion::fromBSON(found["lastmod"])
+ << " instead of " << maxVersion << migrateLog;
+ error() << "TERMINATING" << migrateLog;
+ dbexit(EXIT_SHARDING_ERROR);
+ }
+
+ try {
+ // update for the chunk being moved
+ BSONObjBuilder n;
+ n.append( "_id" , Chunk::genID( ns , min ) );
+ myVersion.addToBSON( n, "lastmod" );
+ n.append( "ns" , ns );
+ n.append( "min" , min );
+ n.append( "max" , max );
+ n.append( "shard" , toShard.getName() );
+ conn->get()->update(ShardNS::chunk, QUERY("_id" << Chunk::genID(ns, min)), n.obj());
+ }
+ catch (DBException &e) {
+ warning() << e << migrateLog;
+ error() << "moveChunk error updating the chunk being moved" << migrateLog;
+ throw e;
+ }
+
+ nextVersion = myVersion;
+
+ // if we have chunks left on the FROM shard, update the version of one of them as well
+ // we can figure that out by grabbing the chunkManager installed on 5.a
+ // TODO expose that manager when installing it
+
+ ShardChunkManagerPtr chunkManager = shardingState.getShardChunkManager(ns);
+ if (chunkManager->getNumChunks() > 0) {
+ // get another chunk on that shard
+ BSONObj lookupKey;
+ BSONObj bumpMin, bumpMax;
+ do {
+ chunkManager->getNextChunk( lookupKey , &bumpMin , &bumpMax );
+ lookupKey = bumpMin;
+ }
+ while( bumpMin == min );
+
+ nextVersion.incMinor(); // same as used on donateChunk
+ try {
+ BSONObjBuilder n;
+ n.append( "_id" , Chunk::genID( ns , bumpMin ) );
+ nextVersion.addToBSON( n, "lastmod" );
+ n.append( "ns" , ns );
+ n.append( "min" , bumpMin );
+ n.append( "max" , bumpMax );
+ n.append( "shard" , fromShard.getName() );
+ conn->get()->update(ShardNS::chunk, QUERY("_id" << Chunk::genID(ns, bumpMin)), n.obj());
+ log() << "moveChunk updating self version to: " << nextVersion << " through "
+ << bumpMin << " -> " << bumpMax << " for collection '" << ns << "'" << migrateLog;
+ }
+ catch (DBException &e) {
+ warning() << e << migrateLog;
+ error() << "moveChunk error updating chunk on the FROM shard" << migrateLog;
+ throw e;
+ }
+ }
+ else {
+ log() << "moveChunk moved last chunk out for collection '" << ns << "'" << migrateLog;
+ }
+
+ txn.commit();
+ conn->done();
+ if (tries > 0) {
+ DEV LOG(0) << "moveChunk succeeded after " << tries << " retries" << migrateLog;
+ }
+ break;
}
- catch (DBException &e) {
- warning() << e << migrateLog;
- error() << "moveChunk error updating chunk on the FROM shard" << migrateLog;
- throw e;
+ catch (storage::RetryableException) {
+ conn->done();
+ if (tries < 5) {
+ DEV LOG(0) << "moveChunk retry #" << (tries + 1) << migrateLog;
+ }
+ else {
+ DEV LOG(0) << "moveChunk tried " << (tries + 1) << " times, giving up..." << migrateLog;
+ throw;
+ }
}
}
- else {
- log() << "moveChunk moved last chunk out for collection '" << ns << "'" << migrateLog;
- }
-
- txn.commit();
- conn->done();
+ }
+ catch (DBException &e) {
+ LOG(0) << "moveChunk got exception " << e.getCode() << ": " << e.what() << migrateLog;
+ error() << "moveChunk failed to get confirmation of commit" << migrateLog;
+ error() << "TERMINATING" << migrateLog;
+ dbexit(EXIT_SHARDING_ERROR);
}
catch (...) {
// TODO(leif): Vanilla, if it fails, waits 10 seconds and does a query to see if somehow the commit made it through anyway. Maybe we need such a mechanism too?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment