Info and Help

Contatto Skype: stivlo

My status


Send us a message

Language
English
mysqldump and corruption of utf-8 encoding

I tried to dump a utf-8 encoded database from mysql 4.1.14 to mysql 5.0.16. Despite many efforts and trying --default-character-set option, it was always restoring corrupted data.

I've then decided to write my own code to fix the wrong dump. It is meant to be used on an already restored db, and will fix the wrong encodings. I've used it from the command line as follows.

$ ./dumper.sql > dump.sql

After copying the file to the other machine with scp I've restored as usual.

mysql -u root -p mydbname < dump.sql

This did the trick.

CAUTION: if you want to try this script on your own database, be sure to have a backup first, or even better try it on a copy. This script doesn't do any update query on the database, they are done only when you load the resulting file with mysql.

The function get_table_field() tries to avoid any field that does not need recoding, double check that the list is correct for your database. It's for sure not a complete list.

#!/usr/local/bin/php
<?php

$param
=array("host" => "localhost",
             
"username" => "root"
             
"password" => "****",
             
"db_name"  => "mydbname");

main($param);

function 
main($param) {
    
header("Content-type: text/html; charset=UTF-8");
    
$link1=mysql_connect($param["host"], $param["username"], $param["password"]);
    if (!
$link1) {
        echo 
"can't connect to database\n";
        exit();
    }
    
mysql_select_db($param["db_name"], $link1);
    
$link2=mysql_connect($param["host"], $param["username"], $param["password"]);
    
mysql_select_db($param["db_name"], $link2);
    list(
$pri$table_field) = get_table_field($link1);
    
dump_all($pri$table_field$link1$link2);
    
mysql_close($link1);
    
mysql_close($link2);
}

function 
dump_all($pri$table_field, &$link1, &$link2) {
    for (
$i=0$i<count($table_field); $i++) {
        
$table $table_field[$i][0];
        
$field $table_field[$i][1];
        
$sql="SELECT `".$field."`, ".implode(","$pri[$table])." FROM `"
          
.$table."`";
        
//echo $sql."<br/>";
        
$res1 mysql_query($sql$link1);
        while (
$row=mysql_fetch_assoc($res1)) {
            
$encoding mb_detect_encoding($row[$field]);
            if (
$encoding=="ASCII") continue;
            
$content $row[$field];
            
$content str_replace("à""&agrave;"$content);
            
$content str_replace("è""&egrave;"$content);
            
$content str_replace("é""&eacute;"$content);
            
$content str_replace("ì""&igrave;"$content);
            
$content str_replace("ò""&ograve;"$content);
            
$content str_replace("ù""&ugrave;"$content);
            
//$content = htmlentities($content,  ENT_COMPAT, "UTF-8");
            
$where get_where_cond($pri[$table], $row);
            
$sql "UPDATE `".$table."` SET `".$field."`='"
              
.mysql_real_escape_string($content)."'".$where.";";
            
//$res2 = mysql_query($sql, $link2);
            
if (!$res2) {
                echo 
$sql."\n";
            }
        }
    }
}

function 
get_table_field($link) {
    
$table get_tables($link);
    
$table_field=array();
    
$pri = array();
    for (
$i=0$i<count($table); $i++) {
        
$res mysql_query("DESC ".$table[$i]);
        
$pri[$table[$i]]=array();
        while (
$row mysql_fetch_array($res)) {
            
$field $row["Field"];
            
$type $row["Type"];
            if (
$row["Key"]=="PRI") {
                
$pri[$table[$i]][] = $row["Field"];
            }
            if (
substr($type,0,3)=="int") continue;
            if (
substr($type,0,4)=="date") continue;
            if (
substr($type,0,4)=="enum") continue;
            if (
substr($type,0,4)=="blob") continue;
            if (
substr($type,0,5)=="float") continue;
            if (
substr($type,0,6)=="double") continue;
            if (
substr($type,0,7)=="char(1)") continue;
            if (
substr($type,0,7)=="tinyint") continue;
            if (
substr($type,0,7)=="decimal") continue;
            if (
substr($type,0,8)=="smallint") continue;
            if (
substr($type,0,8)=="longblob") continue;
            if (
substr($type,0,9)=="mediumint") continue;
            if (
substr($type,0,10)=="mediumblob") continue;
            
/**
            if (substr($type,0,4)=="char") continue;
            if (substr($type,0,7)=="varchar") continue;
            if (substr($type,0,4)=="text") continue;
            echo $table[$i]." " .$field." ".$type."<br/>";
            /**/
            
$table_field[] = array($table[$i], $field);
        }
    }
    return array(
$pri$table_field);
}

function 
get_tables($link) {
    
$table=array();
    
$res mysql_query("SHOW TABLES");
    while (
$row mysql_fetch_array($res)) {
        
$table[]=$row[0];
    }
    return 
$table;
}

function 
get_where_cond($pri$row) {
    
$where=" WHERE ";
    
$connector="";
    for (
$i=0$i<count($pri); $i++) {
        
$key=$pri[$i];
        
$value=$row[$key];
        if (!
is_numeric($value)) $value="'".mysql_real_escape_string($value)."'";
        
$where.=$connector."`".$key."`=".$value;
        
$connector=" AND ";
    }
    return 
$where;
}