User:Giorgosg
Jump to navigation
Jump to search
Import isfdb mysql dump in sqlite3
quick script only tested with the 2010-09-04 backup. with any change in the backup layout it will probably stop working.
The resulting db is 140Mb without indexes and 260Mb with. Takes 5 minutes to import with indexes on my laptop.
#!/usr/bin/python
import codecs
import collections
import sqlite3
backup_file = 'backup-2010-09-04'
sqlite_file = 'isf.db'
create_idx = True
tabledata = []
creation = []
drop_lines = ['--', '/*', 'LOCK TABLES', 'UNLOCK TABLES', 'PRIMARY KEY',
'KEY ', 'UNIQUE KEY']
remove_strings = ['character set latin1 collate latin1_bin ', 'unsigned ']
indexes = []
backup_text = codecs.open(backup_file, 'r', 'utf-8').readlines()
for line in backup_text:
if line.startswith('INSERT'):
tabledata.append(line)
else:
if any(line.strip().startswith(dl) for dl in drop_lines):
pass
elif line.find('auto_increment,') != -1:
# assume all autoincrements are primary keys.
creation.append(''.join((' ', line.strip().split(' ')[0],
' INTEGER PRIMARY KEY ASC,\n')))
elif line.startswith(')'):
if creation[-1].strip().endswith(','):
creation[-1] = ' ' + creation[-1].strip()[:-1] + '\n'
creation.append(');\n')
elif line.find('enum(') != -1:
creation.append(''.join((' ', line.strip().split(' ')[0],
' varchar(15) default NULL,\n')))
else:
for r in remove_strings:
line = ''.join(line.split(r))
creation.append(line)
if any(line.strip().startswith(k) for k in drop_lines[-2:]):
ti = 1
while not creation[-ti].startswith('CREATE'):
ti += 1
tbl = creation[-ti].split('`')[1]
col = line.split('(')[1].split('`')[1]
indexes.append((tbl, col))
#print indexes
#file('dbtmp', 'w').write(''.join(creation))
conn = sqlite3.connect(sqlite_file)
# create the tables
conn.executescript(''.join(creation))
for chunk in tabledata:
tablenm = chunk[chunk.find('`'):50].split('`')[1]
values = ''.join(('(', chunk[chunk.find('('):-3], ',)'))
values = eval(values, {'NULL': None})
try:
values = [[codecs.utf_8_decode(value)[0] if type(value)==str else value
for value in v] for v in values]
except:
print values
print 'adding into', tablenm
qs = u'insert into %s values (%s)' % (tablenm, (u"?, " * len(values[0]))[:-2])
for v in values:
conn.execute(qs, v)
conn.commit()
if create_idx:
print 'creating indexes.'
for idx in indexes:
conn.execute('create index %s on %s(%s)' %
('_'.join(idx), idx[0], idx[1]))
conn.commit()
conn.close()