#!/usr/bin/env php
<?php
/**
 * Add an ID to each content tag in the given HTML file so
 * that it's possible to link to each paragraph.
 *
 * The rules are:
 * - content tags get an ID (p, li, dd, dt)
 * - IDs of headings are used as prefix for the following content tags
 * - if one content tag (li) contains a content tag (p), the parent content
 *   tag (li) gets its id removed
 * - ids get prefixed
 *
 * @author Christian Weiske <cweiske@cweiske.de>
 */
$bRemoveContainerIdsOnContent = false;

if ($argc < 2) {
    echo "Please pass a file\n";
    exit(1);
}
$file = $argv[1];
if (!file_exists($file)) {
    echo "File does not exist: $file\n";
    exit(2);
}
if (substr($file, -4) != '.htm') {
    echo "add-ids works only on htm files\n";
    exit(3);
}

require_once 'autoload.php';

$sx = simplexml_load_file($file);
if ($sx === false) {
    echo "Failed to load XML\n";
    exit(4);    
}

//tags whose ID is used as prefix for the next
$refTags = array_flip(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'));
$idTags = array(
    'blockquote' => 'q',
    'dd' => 'd',
    'dl' => 'l',
    'dt' => 't',
    'h1' => 'h',
    'h2' => 'h',
    'h3' => 'h',
    'h4' => 'h',
    'h5' => 'h',
    'h6' => 'h',
    'li' => 'i',
    'ol' => 'l',
    'p'  => 'p',
    'pre' => 'c',
    'ul' => 'l',
);
$containerTags = array_flip(
    array(
        'blockquote', 'div',
        'dl', 'ol', 'ul', 'li'
    )
);
$idPrefix    = '';
$counter     = array();
$existingIds = array();

addIds($sx->body->children());

function addIds($tags) {
    global $containerTags, $idTags, $refTags,
        $idPrefix, $counter, $existingIds, $bRemoveContainerIdsOnContent;

    $contentIds = 0;
    foreach ($tags as $tag) {
        $tagname = $tag->getName();
        if (!array_key_exists($tagname, $idTags)
            && !array_key_exists($tagname, $containerTags)) {
            continue;
        }

        if (isset($tag['id'])) {
            $existingIds[(string) $tag['id']] = true;
            if (isset($refTags[$tagname])) {
                $idPrefix = $tag['id'] . '-';
                //reset counter, but keep heading counter
                foreach ($counter as $key => $val) {
                    if ($key != 'h') {
                        $counter[$key] = 0;
                    }
                }
            }
        } else if (isset($idTags[$tagname])) {
            //add ID to element
            $idtagname = $idTags[$tagname];
            if (!isset($counter[$idtagname])) {
                $counter[$idtagname] = 0;
            }

            do {
                $newId = $idPrefix . $idtagname . ++$counter[$idtagname];
            } while (isset($existingIds[$newId]));
            $tag['id'] = $newId;
            $existingIds[$newId] = true;
        }

        if (isset($containerTags[$tagname])) {
            $childContentIds = addIds($tag->children());
            $contentIds += $childContentIds;

            if ($bRemoveContainerIdsOnContent && $childContentIds > 0) {
                unset($tag['id']);
            }
        } else if (isset($idTags[$tagname])) {
            $contentIds++;
        }

    }
    return $contentIds;
}

$newXml = $sx->asXML();
if (strlen($newXml) < filesize($file)) {
    echo "Something went wrong when adding IDs to $file\n";
    exit(10);
}
file_put_contents($file, $newXml);
?>
