Skip to content

Commit 168972d

Browse files
committed
Add copy_table_from_file to Script::Utils
admin/MBImport.pl and admin/replication/ImportReplicationChanges contained very similar implementations of `ImportTable`, so it would be ideal to share them. I'd also like to use the same functionality in a future commit (to load dbmirror2 packets into temporary tables). The implementations in these two files did diverge slightly. For one, MBImport.pl's allowed fixing broken UTF-8 byte sequences. I'm not sure how necessary that is in 2024, or what the historical reasons for adding it were, but I kept the functionality behind a flag in `copy_table_from_file`. MBImport.pl's also supported the flags `$delete_first` (to empty the table before importing) and `$fProgress` (to control whether progress is shown). I've basically kept all of MBImport.pl's code, with these features behind `%opts` flags. I kept the definitions of `ImportTable`, but they now call `copy_table_from_file` internally. I couldn't replace all of the `ImportTable` calls with direct calls to `copy_table_from_file`, because `ImportTable` also updates statistics local to each file and has a different return value.
1 parent d07abf5 commit 168972d

File tree

3 files changed

+142
-157
lines changed

3 files changed

+142
-157
lines changed

admin/MBImport.pl

Lines changed: 16 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@
1010
use Getopt::Long;
1111
use DBDefs;
1212
use Sql;
13-
use MusicBrainz::Script::Utils qw( is_table_empty );
13+
use MusicBrainz::Script::Utils qw(
14+
copy_table_from_file
15+
is_table_empty
16+
);
1417
use MusicBrainz::Server::Replication qw( :replication_type );
1518
use MusicBrainz::Server::Constants qw( @FULL_TABLE_LIST );
1619

1720
use aliased 'MusicBrainz::Server::DatabaseConnectionFactory' => 'Databases';
1821

1922
my ($fHelp, $fIgnoreErrors);
2023
my $tmpdir = '/tmp';
21-
my $fProgress = -t STDOUT;
2224
my $fFixUTF8 = 0;
2325
my $skip_ensure_editor = 0;
2426
my $update_replication_control = 1;
@@ -215,104 +217,25 @@ sub usage
215217

216218
exit($errors ? 1 : 0);
217219

218-
219-
220220
sub ImportTable
221221
{
222222
my ($table, $file) = @_;
223223

224-
print localtime() . " : load $table\n";
225-
226-
my $rows = 0;
227-
228-
my $t1 = [gettimeofday];
229-
my $interval;
230-
231-
my $size = -s($file)
232-
or return 1;
233-
234-
my $p = sub {
235-
my ($pre, $post) = @_;
236-
no integer;
237-
printf $pre.'%-30.30s %9d %3d%% %9d'.$post,
238-
$table, $rows, int(100 * tell(LOAD) / $size),
239-
$rows / ($interval||1);
240-
};
241-
242-
$OUTPUT_AUTOFLUSH = 1;
243-
244-
eval
245-
{
246-
# open in :bytes mode (always keep byte octets), to allow fixing of invalid
247-
# UTF-8 byte sequences in --fix-broken-utf8 mode.
248-
# in default mode, the Pg driver will take care of the UTF-8 transformation
249-
# and croak on any invalid UTF-8 character
250-
open(LOAD, '<:bytes', $file) or die "open $file: $OS_ERROR";
251-
252-
# If you're looking at this code because your import failed, maybe
253-
# with an error like this:
254-
# ERROR: copy: line 1, Missing data for column "automodsaccepted"
255-
# then the chances are it's because the data you're trying to load
256-
# doesn't match the structure of the database you're trying to load it
257-
# into. Please make sure you've got the right copy of the server
258-
# code, as described in the INSTALL file.
259-
260-
$sql->begin;
261-
$sql->do("DELETE FROM $table") if $delete_first;
262-
my $dbh = $sql->dbh; # issues a ping, must be done before COPY
263-
$sql->do("COPY $table FROM stdin");
264-
265-
$p->('', '') if $fProgress;
266-
my $t;
267-
268-
use Encode;
269-
while (<LOAD>)
270-
{
271-
$t = $_;
272-
if ($fFixUTF8) {
273-
# replaces any invalid UTF-8 character with special 0xFFFD codepoint
274-
# and warn on any such occurence
275-
$t = Encode::decode('UTF-8', $t, Encode::FB_DEFAULT | Encode::WARN_ON_ERR);
276-
} else {
277-
$t = Encode::decode('UTF-8', $t, Encode::FB_CROAK);
278-
}
279-
if (!$dbh->pg_putcopydata($t))
280-
{
281-
print 'ERROR while processing: ', $t;
282-
die;
283-
}
284-
285-
++$rows;
286-
unless ($rows & 0xFFF)
287-
{
288-
$interval = tv_interval($t1);
289-
$p->("\r", '') if $fProgress;
290-
}
291-
}
292-
$dbh->pg_putcopyend() or die;
293-
$interval = tv_interval($t1);
294-
$p->(($fProgress ? "\r" : ''), sprintf(" %.2f sec\n", $interval));
295-
296-
close LOAD
297-
or die $OS_ERROR;
298-
299-
$sql->commit;
300-
301-
die 'Error loading data'
302-
if -f $file and is_table_empty($sql, $table);
224+
my $rows = copy_table_from_file(
225+
$sql, $table, $file,
226+
delete_first => $delete_first,
227+
fix_utf8 => $fFixUTF8,
228+
ignore_errors => $fIgnoreErrors,
229+
);
303230

231+
if ($rows) {
304232
++$tables;
305233
$totalrows += $rows;
306-
307-
1;
308-
};
309-
310-
return 1 unless $EVAL_ERROR;
311-
warn "Error loading $file: $EVAL_ERROR";
312-
$sql->rollback;
313-
314-
++$errors, return 0 if $fIgnoreErrors;
315-
exit 1;
234+
return 1;
235+
} else {
236+
++$errors;
237+
return 0;
238+
}
316239
}
317240

318241
sub ImportAllTables

admin/replication/ImportReplicationChanges

Lines changed: 14 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ use FindBin;
99
use lib "$FindBin::Bin/../../lib";
1010

1111
use Getopt::Long;
12-
use MusicBrainz::Script::Utils qw( is_table_empty );
12+
use MusicBrainz::Script::Utils qw(
13+
copy_table_from_file
14+
is_table_empty
15+
);
1316
use MusicBrainz::Server::Context;
1417
use DBDefs;
1518
use Sql;
@@ -105,72 +108,19 @@ sub ImportTable
105108
{
106109
my ($table, $file) = @_;
107110

108-
print localtime() . " : load $table\n";
109-
110-
my $rows = 0;
111-
112-
my $t1 = [gettimeofday];
113-
my $interval;
114-
115-
my $size = -s($file) || 1;
116-
117-
my $p = sub {
118-
my ($pre, $post) = @_;
119-
no integer;
120-
printf $pre.'%-30.30s %9d %3d%% %9d'.$post,
121-
$table, $rows, int(100 * tell(LOAD) / $size),
122-
$rows / ($interval||1);
123-
};
124-
125-
$OUTPUT_AUTOFLUSH = 1;
126-
127-
eval
128-
{
129-
open(LOAD, '<:encoding(utf8)', $file) or die "open $file: $OS_ERROR";
130-
131-
$sql->begin;
132-
my $dbh = $sql->dbh; # issues a ping, must be done before COPY
133-
$sql->do("COPY $table FROM stdin");
134-
135-
$p->('', '');
136-
137-
while (<LOAD>)
138-
{
139-
$dbh->pg_putcopydata($_) or die;
140-
141-
++$rows;
142-
unless ($rows & 0xFFF)
143-
{
144-
$interval = tv_interval($t1);
145-
$p->("\r", '');
146-
}
147-
}
148-
149-
$dbh->pg_putcopyend() or die;
150-
151-
$interval = tv_interval($t1);
152-
$p->("\r", sprintf(" %.2f sec\n", $interval));
153-
154-
close LOAD
155-
or die $OS_ERROR;
156-
157-
$sql->commit;
158-
159-
die 'Error loading data'
160-
if -f $file and is_table_empty($sql, $table);
111+
my $rows = copy_table_from_file(
112+
$sql, $table, $file,
113+
ignore_errors => $fIgnoreErrors,
114+
);
161115

116+
if ($rows) {
162117
++$tables;
163118
$totalrows += $rows;
164-
165-
1;
166-
};
167-
168-
return 1 unless $EVAL_ERROR;
169-
warn "Error loading $file: $EVAL_ERROR";
170-
$sql->rollback;
171-
172-
++$errors, return 0 if $fIgnoreErrors;
173-
exit 1;
119+
return 1;
120+
} else {
121+
++$errors;
122+
return 0;
123+
}
174124
}
175125

176126
sub ImportReplicationTables

lib/MusicBrainz/Script/Utils.pm

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,132 @@ package MusicBrainz::Script::Utils;
22
use strict;
33
use warnings;
44

5+
use Encode;
56
use English;
7+
use Time::HiRes qw( gettimeofday tv_interval );
68

79
use feature 'state';
810

911
use base 'Exporter';
1012

1113
our @EXPORT_OK = qw(
14+
copy_table_from_file
1215
get_primary_keys
1316
get_foreign_keys
1417
is_table_empty
1518
log
1619
retry
1720
);
1821

22+
=sub copy_table_from_file
23+
24+
Imports C<$file> into C<$table> via PostgreSQL's C<COPY> statement.
25+
Returns the number of rows imported.
26+
27+
=cut
28+
29+
sub copy_table_from_file {
30+
my ($sql, $table, $file, %opts) = @_;
31+
32+
my $delete_first = $opts{delete_first};
33+
my $fix_utf8 = $opts{fix_utf8};
34+
my $ignore_errors = $opts{ignore_errors};
35+
my $quiet = $opts{quiet};
36+
my $show_progress = !$quiet && ($opts{show_progress} // (-t STDOUT));
37+
38+
print localtime() . " : load $table\n"
39+
unless $quiet;
40+
41+
my $rows = 0;
42+
my $t1 = [gettimeofday];
43+
my $interval;
44+
45+
my $size = -s($file)
46+
or return 1;
47+
48+
my $p = sub {
49+
my ($pre, $post) = @_;
50+
no integer;
51+
printf $pre.'%-30.30s %9d %3d%% %9d'.$post,
52+
$table, $rows, int(100 * tell(LOAD) / $size),
53+
$rows / ($interval || 1);
54+
};
55+
56+
$OUTPUT_AUTOFLUSH = 1;
57+
58+
eval {
59+
# Open in :bytes mode (always keep byte octets), to allow fixing of
60+
# invalid UTF-8 byte sequences in --fix-broken-utf8 mode.
61+
# In default mode, the Pg driver will take care of the UTF-8
62+
# transformation and croak on any invalid UTF-8 character.
63+
open(LOAD, '<:bytes', $file) or die "open $file: $OS_ERROR";
64+
65+
# If you're looking at this code because your import failed, maybe
66+
# with an error like this:
67+
# ERROR: copy: line 1, Missing data for column "automodsaccepted"
68+
# then the chances are it's because the data you're trying to load
69+
# doesn't match the structure of the database you're trying to load
70+
# it into. Please make sure you've got the right copy of the server
71+
# code, as described in the INSTALL file.
72+
73+
$sql->begin;
74+
$sql->do("DELETE FROM $table") if $delete_first;
75+
76+
my $dbh = $sql->dbh; # issues a ping, must be done before COPY
77+
$sql->do("COPY $table FROM stdin");
78+
79+
$p->('', '') if $show_progress;
80+
81+
my $t;
82+
while (<LOAD>) {
83+
$t = $_;
84+
if ($fix_utf8) {
85+
# Replaces any invalid UTF-8 character with special 0xFFFD
86+
# codepoint and warn on any such occurence.
87+
$t = Encode::decode('UTF-8', $t,
88+
Encode::FB_DEFAULT |
89+
Encode::WARN_ON_ERR);
90+
} else {
91+
$t = Encode::decode('UTF-8', $t, Encode::FB_CROAK);
92+
}
93+
if (!$dbh->pg_putcopydata($t)) {
94+
print 'ERROR while processing: ', $t;
95+
die;
96+
}
97+
98+
++$rows;
99+
unless ($rows & 0xFFF) {
100+
$interval = tv_interval($t1);
101+
$p->("\r", '') if $show_progress;
102+
}
103+
}
104+
105+
$dbh->pg_putcopyend or die;
106+
107+
$interval = tv_interval($t1);
108+
$p->(($show_progress ? "\r" : ''),
109+
sprintf(" %.2f sec\n", $interval))
110+
unless $quiet;
111+
112+
close LOAD
113+
or die $OS_ERROR;
114+
115+
$sql->commit;
116+
117+
die 'Error loading data'
118+
if -f $file and is_table_empty($sql, $table);
119+
120+
1;
121+
};
122+
123+
return $rows unless $EVAL_ERROR;
124+
warn "Error loading $file: $EVAL_ERROR";
125+
$sql->rollback;
126+
127+
return 0 if $ignore_errors;
128+
exit 1;
129+
}
130+
19131
=sub get_foreign_keys
20132
21133
Get a list of foreign key columns for (C<$schema>, C<$table>).

0 commit comments

Comments
 (0)