1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | #!/usr/bin/env php <?php /** * Extract base64 encoded images from an HTML file. * Saves the images as files and replaces the base64 images with * the file paths. * * Useful for LibreOffice/OpenOffice HTML exports. * * Usage: * $ ./htmlbase64extractor.php file.html > fixed.html * * Licensed in the public domain * * @author Christian Weiske <weiske@mogic.com> */ if ($argc < 2) { echo "File missing\n"; exit(1); } $map = array( 'image/jpg' => 'jpg', 'image/jpeg' => 'jpg', 'image/png' => 'png', 'image/gif' => 'gif', ); $file = $argv[1]; $imgdir = $file . '-images/'; $relimgdir = basename($file . '-images/') . '/'; if (!is_dir($imgdir)) { mkdir($imgdir); } $content = file_get_contents($file); $imgcounter = 0; $content2 = preg_replace_callback( '#data:(image/[^;]*);base64,([a-zA-Z0-9+/=]*)#', 'extractImage', $content ); echo $content2; function extractImage($matches) { global $imgcounter, $imgdir, $relimgdir, $map; list(, $type, $base64) = $matches; if ($type == 'image/*') { //fixme: detect image type $type = 'image/png'; } $filename = 'image-' . $imgcounter++ . '.' . $map[$type]; file_put_contents($imgdir . $filename, base64_decode($base64)); return $relimgdir . $filename; } ?> |