User:Giorgosg

From ISFDB
Jump to navigation Jump to search

Import isfdb mysql dump in sqlite3

quick script only tested with the 2010-09-04 backup. with any change in the backup layout it will probably stop working.

The resulting db is 140Mb without indexes and 260Mb with. Takes 5 minutes to import with indexes on my laptop.

#!/usr/bin/python
import codecs
import collections
import sqlite3

backup_file = 'backup-2010-09-04'
sqlite_file = 'isf.db'
create_idx = True

tabledata = []
creation = []
drop_lines = ['--', '/*', 'LOCK TABLES', 'UNLOCK TABLES', 'PRIMARY KEY', 
              'KEY ', 'UNIQUE KEY']
remove_strings = ['character set latin1 collate latin1_bin ', 'unsigned ']
indexes = []

backup_text = codecs.open(backup_file, 'r', 'utf-8').readlines()

for line in backup_text:
    if line.startswith('INSERT'):
        tabledata.append(line)
    else:
        if any(line.strip().startswith(dl) for dl in drop_lines):
            pass
        elif line.find('auto_increment,') != -1:
            # assume all autoincrements are primary keys.
            creation.append(''.join(('  ', line.strip().split(' ')[0], 
                                     ' INTEGER PRIMARY KEY ASC,\n')))
        elif line.startswith(')'):
            if creation[-1].strip().endswith(','):
                creation[-1] = '  ' + creation[-1].strip()[:-1] + '\n'
            creation.append(');\n')
        elif line.find('enum(') != -1:
            creation.append(''.join(('  ', line.strip().split(' ')[0], 
                                     ' varchar(15) default NULL,\n')))
        else:
            for r in remove_strings:
                line = ''.join(line.split(r))
            creation.append(line)
        if any(line.strip().startswith(k) for k in drop_lines[-2:]):
            ti = 1
            while not creation[-ti].startswith('CREATE'): 
                ti += 1
            tbl = creation[-ti].split('`')[1]
            col = line.split('(')[1].split('`')[1]
            indexes.append((tbl, col))

#print indexes					
#file('dbtmp', 'w').write(''.join(creation))

conn = sqlite3.connect(sqlite_file)
# create the tables
conn.executescript(''.join(creation))

for chunk in tabledata:
    tablenm = chunk[chunk.find('`'):50].split('`')[1]
    values = ''.join(('(', chunk[chunk.find('('):-3], ',)'))
    values = eval(values, {'NULL': None})
    try:
        values = [[codecs.utf_8_decode(value)[0] if type(value)==str else value 
                   for value in v] for v in values]
    except:
        print values
    print 'adding into', tablenm
    qs = u'insert into %s values (%s)' % (tablenm, (u"?, " * len(values[0]))[:-2])    
    for v in values:
        conn.execute(qs, v)
    conn.commit()

if create_idx:
    print 'creating indexes.'
    for idx in indexes:
        conn.execute('create index %s on %s(%s)' % 
                    ('_'.join(idx), idx[0], idx[1]))

conn.commit()
conn.close()