diff --git a/cmd/dump.go b/cmd/dump.go index ec5da90..f40d12d 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -132,6 +132,11 @@ func dumpCmd(passedExecs execs, cmdConfig *cmdConfiguration) (*cobra.Command, er if !v.IsSet("routines") && dumpConfig != nil && dumpConfig.Routines != nil { routines = *dumpConfig.Routines } + ignoreTables := v.GetStringSlice("ignore-tables") + if len(ignoreTables) == 0 { + ignoreTables = nil + } + maxAllowedPacket := v.GetInt("max-allowed-packet") if !v.IsSet("max-allowed-packet") && dumpConfig != nil && dumpConfig.MaxAllowedPacket != nil && *dumpConfig.MaxAllowedPacket != 0 { maxAllowedPacket = *dumpConfig.MaxAllowedPacket @@ -273,6 +278,7 @@ func dumpCmd(passedExecs execs, cmdConfig *cmdConfiguration) (*cobra.Command, er Run: uid, FilenamePattern: filenamePattern, Parallelism: parallel, + IgnoreTables: ignoreTables, } _, err := executor.Dump(tracerCtx, dumpOpts) if err != nil { @@ -366,6 +372,9 @@ S3: If it is a URL of the format s3://bucketname/path then it will connect via S // retention flags.String("retention", "", "Retention period for backups. Optional. If not specified, no pruning will be done. Can be number of backups or time-based. For time-based, the format is: 1d, 1w, 1m, 1y for days, weeks, months, years, respectively. For number-based, the format is: 1c, 2c, 3c, etc. for the count of backups to keep.") + // ignore-tables: tables to exclude from the dump (formats: database.table or table) + flags.StringSlice("ignore-tables", []string{}, "Tables to exclude from the dump. Formats: database.table (e.g. mydb.mytable) or table (applies to all databases/schemas). Can be specified multiple times or as a comma-separated list.") + // encryption options flags.String("encryption", "", fmt.Sprintf("Encryption algorithm to use, none if blank. Supported are: %s. Format must match the specific algorithm.", strings.Join(encrypt.All, ", "))) flags.String("encryption-key", "", "Encryption key to use, base64-encoded. Useful for debugging, not recommended for production. If encryption is enabled, and both are provided or neither is provided, returns an error.") diff --git a/cmd/dump_test.go b/cmd/dump_test.go index 3b11ca8..f831d0c 100644 --- a/cmd/dump_test.go +++ b/cmd/dump_test.go @@ -196,6 +196,70 @@ func TestDumpCmd(t *testing.T) { Routines: true, Parallelism: 1, }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + + // exclude + {"exclude single", []string{"--server", "abc", "--target", "file:///foo/bar", "--exclude", "mydb"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + Exclude: []string{"mydb"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"exclude comma-separated", []string{"--server", "abc", "--target", "file:///foo/bar", "--exclude", "db1,db2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + Exclude: []string{"db1", "db2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"exclude multiple flags", []string{"--server", "abc", "--target", "file:///foo/bar", "--exclude", "db1", "--exclude", "db2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + Exclude: []string{"db1", "db2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + + // ignore-tables + {"ignore-tables single", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "mydb.mytable"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + IgnoreTables: []string{"mydb.mytable"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"ignore-tables comma-separated", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "db1.table1,db2.table2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + IgnoreTables: []string{"db1.table1", "db2.table2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, + {"ignore-tables multiple flags", []string{"--server", "abc", "--target", "file:///foo/bar", "--ignore-tables", "db1.table1", "--ignore-tables", "db2.table2"}, "", false, core.DumpOptions{ + Targets: []storage.Storage{file.New(*fileTargetURL)}, + MaxAllowedPacket: defaultMaxAllowedPacket, + Compressor: &compression.GzipCompressor{}, + DBConn: &database.Connection{Host: "abc", Port: defaultPort}, + FilenamePattern: "db_backup_{{ .now }}.{{ .compression }}", + Routines: true, + Parallelism: 1, + IgnoreTables: []string{"db1.table1", "db2.table2"}, + }, core.TimerOptions{Frequency: defaultFrequency, Begin: defaultBegin}, nil}, } for _, tt := range tests { diff --git a/docs/backup.md b/docs/backup.md index 7b48e7a..0b96804 100644 --- a/docs/backup.md +++ b/docs/backup.md @@ -14,7 +14,7 @@ to a target. That target can be one of: By default, all databases in the database server are backed up, and the system databases named `information_schema`, `performance_schema`, `sys` and `mysql` are excluded. -For example, if you set `DB_DUMP_EXCLUDE=database1 db2` then these two databases will not be dumped. +For example, if you set `DB_DUMP_EXCLUDE=database1,db2` then these two databases will not be dumped. **Dumping just some databases** diff --git a/pkg/core/dump.go b/pkg/core/dump.go index 00347a4..7a11b5d 100644 --- a/pkg/core/dump.go +++ b/pkg/core/dump.go @@ -93,6 +93,8 @@ func (e *Executor) Dump(ctx context.Context, opts DumpOptions) (DumpResults, err return results, fmt.Errorf("failed to list database schemas: %v", err) } } + // filter out excluded databases + dbnames = filterExcludedDatabases(dbnames, opts.Exclude) span.SetAttributes(attribute.StringSlice("actual-schemas", dbnames)) for _, s := range dbnames { outFile := path.Join(workdir, fmt.Sprintf("%s_%s.sql", s, timepart)) @@ -116,6 +118,7 @@ func (e *Executor) Dump(ctx context.Context, opts DumpOptions) (DumpResults, err MaxAllowedPacket: maxAllowedPacket, PostDumpDelay: opts.PostDumpDelay, Parallelism: parallelism, + IgnoreTables: opts.IgnoreTables, }, dw); err != nil { dbDumpSpan.SetStatus(codes.Error, err.Error()) dbDumpSpan.End() @@ -242,3 +245,21 @@ func ProcessFilenamePattern(pattern string, now time.Time, timestamp, ext string } return buf.String(), nil } + +// filterExcludedDatabases removes databases in the exclude list from dbnames. +func filterExcludedDatabases(dbnames, exclude []string) []string { + if len(exclude) == 0 { + return dbnames + } + excludeMap := make(map[string]bool, len(exclude)) + for _, e := range exclude { + excludeMap[e] = true + } + filtered := make([]string, 0, len(dbnames)) + for _, db := range dbnames { + if !excludeMap[db] { + filtered = append(filtered, db) + } + } + return filtered +} diff --git a/pkg/core/dump_test.go b/pkg/core/dump_test.go new file mode 100644 index 0000000..6fc8667 --- /dev/null +++ b/pkg/core/dump_test.go @@ -0,0 +1,71 @@ +package core + +import ( + "testing" +) + +func TestFilterExcludedDatabases(t *testing.T) { + tests := []struct { + name string + dbnames []string + exclude []string + expected []string + }{ + { + name: "no exclusions", + dbnames: []string{"db1", "db2", "db3"}, + exclude: nil, + expected: []string{"db1", "db2", "db3"}, + }, + { + name: "empty exclusions", + dbnames: []string{"db1", "db2", "db3"}, + exclude: []string{}, + expected: []string{"db1", "db2", "db3"}, + }, + { + name: "exclude one", + dbnames: []string{"db1", "db2", "db3"}, + exclude: []string{"db2"}, + expected: []string{"db1", "db3"}, + }, + { + name: "exclude multiple", + dbnames: []string{"db1", "db2", "db3", "db4"}, + exclude: []string{"db2", "db4"}, + expected: []string{"db1", "db3"}, + }, + { + name: "exclude all", + dbnames: []string{"db1", "db2"}, + exclude: []string{"db1", "db2"}, + expected: []string{}, + }, + { + name: "exclude nonexistent", + dbnames: []string{"db1", "db2"}, + exclude: []string{"db99"}, + expected: []string{"db1", "db2"}, + }, + { + name: "exclude with mixed existing and nonexistent", + dbnames: []string{"db1", "db2", "db3"}, + exclude: []string{"db2", "db99"}, + expected: []string{"db1", "db3"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := filterExcludedDatabases(tt.dbnames, tt.exclude) + if len(result) != len(tt.expected) { + t.Fatalf("expected %v, got %v", tt.expected, result) + } + for i, v := range result { + if v != tt.expected[i] { + t.Fatalf("expected %v, got %v", tt.expected, result) + } + } + }) + } +} diff --git a/pkg/core/dumpoptions.go b/pkg/core/dumpoptions.go index 408734d..0e65e3c 100644 --- a/pkg/core/dumpoptions.go +++ b/pkg/core/dumpoptions.go @@ -31,5 +31,6 @@ type DumpOptions struct { // PostDumpDelay inafter each dump is complete, while holding connection open. Do not use outside of tests. PostDumpDelay time.Duration // Parallelism how many databases to back up at once, consuming that number of threads - Parallelism int + Parallelism int + IgnoreTables []string } diff --git a/pkg/database/dump.go b/pkg/database/dump.go index 0fab972..379eef5 100644 --- a/pkg/database/dump.go +++ b/pkg/database/dump.go @@ -19,6 +19,7 @@ type DumpOpts struct { // PostDumpDelay after each dump is complete, while holding connection open. Do not use outside of tests. PostDumpDelay time.Duration Parallelism int + IgnoreTables []string } func Dump(ctx context.Context, dbconn *Connection, opts DumpOpts, writers []DumpWriter) error { @@ -63,6 +64,7 @@ func Dump(ctx context.Context, dbconn *Connection, opts DumpOpts, writers []Dump SkipExtendedInsert: opts.SkipExtendedInsert, MaxAllowedPacket: opts.MaxAllowedPacket, PostDumpDelay: opts.PostDumpDelay, + IgnoreTables: opts.IgnoreTables, } // return on any error if err := dumper.Dump(); err != nil { diff --git a/pkg/database/mysql/dump.go b/pkg/database/mysql/dump.go index 02279bc..97315ef 100644 --- a/pkg/database/mysql/dump.go +++ b/pkg/database/mysql/dump.go @@ -400,7 +400,12 @@ func (data *Data) getCharsetCollections() error { func (data *Data) isIgnoredTable(name string) bool { for _, item := range data.IgnoreTables { - if item == name { + if strings.Contains(item, ".") { + parts := strings.SplitN(item, ".", 2) + if parts[0] == data.Schema && parts[1] == name { + return true + } + } else if item == name { return true } } diff --git a/pkg/database/mysql/dump_test.go b/pkg/database/mysql/dump_test.go new file mode 100644 index 0000000..e15de1a --- /dev/null +++ b/pkg/database/mysql/dump_test.go @@ -0,0 +1,38 @@ +package mysql + +import "testing" + +func TestIsIgnoredTable(t *testing.T) { + tests := []struct { + name string + schema string + ignoreTables []string + tableName string + expected bool + }{ + {"exact table name match", "mydb", []string{"mytable"}, "mytable", true}, + {"table name no match", "mydb", []string{"othertable"}, "mytable", false}, + {"qualified match same schema", "backuppc", []string{"backuppc.hosts"}, "hosts", true}, + {"qualified match wrong schema", "otherdb", []string{"backuppc.hosts"}, "hosts", false}, + {"qualified match wrong table", "backuppc", []string{"backuppc.hosts"}, "summary", false}, + {"multiple entries with qualified match", "backuppc", []string{"otherdb.foo", "backuppc.hosts"}, "hosts", true}, + {"multiple entries no match", "backuppc", []string{"otherdb.foo", "otherdb.bar"}, "hosts", false}, + {"mixed qualified and unqualified", "mydb", []string{"backuppc.hosts", "globaltable"}, "globaltable", true}, + {"empty ignore list", "mydb", []string{}, "mytable", false}, + {"nil ignore list", "mydb", nil, "mytable", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data := &Data{ + Schema: tt.schema, + IgnoreTables: tt.ignoreTables, + } + got := data.isIgnoredTable(tt.tableName) + if got != tt.expected { + t.Errorf("isIgnoredTable(%q) = %v, want %v (schema=%q, ignoreTables=%v)", + tt.tableName, got, tt.expected, tt.schema, tt.ignoreTables) + } + }) + } +}