#!/usr/bin/env php
<?php
/**
 * Extract base64 encoded images from an HTML file.
 * Saves the images as files and replaces the base64 images with
 * the file paths.
 *
 * Useful for LibreOffice/OpenOffice HTML exports.
 *
 * Licensed in the public domain
 *
 * @author Christian Weiske <weiske@mogic.com>
 */
if ($argc < 2) {
    echo "File missing\n";
    exit(1);
}

$map = array(
    'image/jpg'  => 'jpg',
    'image/jpeg' => 'jpg',
    'image/png'  => 'png',
    'image/gif'  => 'gif',
);

$file = $argv[1];
$imgdir    = $file . '-images/';
$relimgdir = basename($file . '-images/') . '/';
if (!is_dir($imgdir)) {
    mkdir($imgdir);
}
$content = file_get_contents($file);

$imgcounter = 0;
$content2 = preg_replace_callback(
    '#data:(image/[^;]*);base64,([a-zA-Z0-9+/=]*)#',
    'extractImage',
    $content
);

echo $content2;


function extractImage($matches)
{
    global $imgcounter, $imgdir, $relimgdir, $map;

    list(, $type, $base64) = $matches;
    if ($type == 'image/*') {
        //fixme: detect image type
        $type = 'image/png';
    }
    $filename = 'image-' . $imgcounter++ . '.' . $map[$type];
    file_put_contents($imgdir . $filename, base64_decode($base64));
    return $relimgdir . $filename;
}
?>
