Wikia

WoWWiki

ArmoryProfileBot/Source

Talk0
101,736pages on
this wiki

< User:ArmoryProfileBot

This bot was written in PHP and uses the HTTP extension. It currently runs from a DreamHost shell account using PHP 5.2.3 (cli).

<?php

// No promises on when these TODOs will get handled.  Don't hold your breath.

// APB 2.0 features:
// * HttpRequestPool?
// * subst:-chained templates (significantly reduce display-time load)
// * round-the-clock updates with burst-resistance queueing?
// * per-character templates includable from any page
//
// APB 2.x features:
// * find characters using API "embeddedin" and "templates" (abolish U:APB/Users, U:*/APB Config, and U:*/APB Table)
// * stats tailored to build (i.e. no Str for mages, +heal for resto druids, etc.)

// ----------------------------------------------------------------------------
// Reference data
// ----------------------------------------------------------------------------

$url_armory = array('US' => "http://www.wowarmory.com/", 'EU' => "http://eu.wowarmory.com/");
$url_wiki = "http://www.wowwiki.com/index.php";

// Exception codes
define('APB_EX_FATAL', 1); // Abort the whole program.
define('APB_EX_SKIP',  2); // Skip the current item.

// Profile fields are accessed as XPaths for sanity.  Add new data fields here; this array is foreach()ed.
$files = array();

$files['character-sheet'] = array(
        'name'     => '//character/@name',
        'realm'    => '//character/@realm',
        'race'     => '//character/@race',
        'gender'   => '//character/@gender',
        'class'    => '//character/@class',
        'guild'    => '//character/@guildName',
        'level'    => '//character/@level',
        'talent1'  => '//talentSpec/@treeOne',
        'talent2'  => '//talentSpec/@treeTwo',
        'talent3'  => '//talentSpec/@treeThree',
        'prof1'    => '//professions/skill[1]/@name',
        'prof1val' => '//professions/skill[1]/@value',
        'prof2'    => '//professions/skill[2]/@name',
        'prof2val' => '//professions/skill[2]/@value',
        'health'   => '//health/@effective',
        'mana'     => '//secondBar[@type="m"]/@effective',
        'str'      => '//strength/@effective',
        'strbase'  => '//strength/@base',
        'agi'      => '//agility/@effective',
        'agibase'  => '//agility/@base',
        'sta'      => '//stamina/@effective',
        'stabase'  => '//stamina/@base',
        'int'      => '//intellect/@effective',
        'intbase'  => '//intellect/@base',
        'spi'      => '//spirit/@effective',
        'spibase'  => '//spirit/@base',
        'arm'      => '//armor/@effective',
        'armbase'  => '//armor/@base',
);

$files['character-skills'] = array(
        'cooking'  => '//skill[@key="cooking"]/@value',
        'firstaid' => '//skill[@key="firstaid"]/@value',
        'fishing'  => '//skill[@key="fishing"]/@value',
        'riding'   => '//skill[@key="riding"]/@value',
);

// ----------------------------------------------------------------------------
// Classes
// ----------------------------------------------------------------------------

// This only exists so that we can tell our own exceptions apart.
class APBException extends Exception {}

// ----------------------------------------------------------------------------
// Functions
// ----------------------------------------------------------------------------

function armory_get($region, $file, $query) {
        global $url_armory;

        $msg = http_get($url_armory[$region] . "$file.xml?$query", array('redirect' => 10,'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219 Firefox/2.0.0.2'), $info);

        // Second attempt on DNS errors
        if (isset($info) && !empty($info['error']) && $info['error'] == "name lookup timed out") $msg = http_get($url_armory[$region] . "$file.xml?$query", array('redirect' => 10,'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219 Firefox/2.0.0.2'), $info);

        if (isset($info) && !empty($info['error'])) throw new APBException("Failed to retrieve profile, error \"$info[error]\"", APB_EX_SKIP);
        if (isset($info) && isset($info['response_code']) && $info['response_code'] != 200) throw new APBException("Failed to retrieve profile, HTTP code $info[response_code]", APB_EX_SKIP);
        if (empty($msg)) throw new APBException("Failed to retrieve profile, unknown error", APB_EX_SKIP);

        return http_parse_message($msg)->body;
}

// Wiki pages can contain a mix of UTF-8 and HTML entities, so the entire
// string can't be passed to mb_convert_encoding.  Also, entities will be
// double encoded (e.g. &ouml;).  This function accounts for both
// conditions.
function toUTF8($str) {
        $count = 1;
        while ($count) $str = preg_replace('/&(?:[0-9A-Za-z]+|#(?:[0-9]+|(x[0-9A-Fa-f]+)));/e', 'mb_convert_encoding(isset($1) ?  "&#" . hexdec($1) . ";" : $0, "UTF-8", "HTML-ENTITIES")', $str, -1, $count);
        return $str;
}

function wiki_get($title) {
        global $url_wiki;

        // The "raw" action appears to be similar to Special:Export, but without the data we don't care about.
        $msg = http_get("$url_wiki?action=raw&title=" . urlencode($title), array(), $info);

        // Second attempt on DNS errors
        if (isset($info) && !empty($info['error']) && $info['error'] == "name lookup timed out") $msg = http_get("$url_wiki?action=raw&title=" . urlencode($title), array(), $info);

        if (isset($info) && !empty($info['error'])) throw new APBException("Failed to retrieve wiki page, error \"$info[error]\"", APB_EX_SKIP);
        if (isset($info) && isset($info['response_code']) && $info['response_code'] != 200) throw new APBException("Failed to retrieve wiki page, HTTP code $info[response_code]", APB_EX_SKIP);
        if (empty($msg)) throw new APBException("Failed to retrieve wiki page, unknown error", APB_EX_SKIP);

        $body = http_parse_message($msg)->body;
        if (preg_match('/<html/i', $body)) throw new APBException("Page contained document tag; probably a non-existent page.", APB_EX_SKIP);
        return $body;
}

// This function uses a static HttpRequest object across calls for authentication purposes.
function wiki_put($title, $text, $comment = "ArmoryProfileBot character profile update", $minor = false) {
        global $post, $url_wiki, $verbose;

        static $req, $req_fields = array(
                'title' => 'Special:Userlogin',
                'action' => 'submitlogin',
                'wpName' => 'ArmoryProfileBot',
                'wpPassword' => 'yeah, like I'd leave it in',
                'wpLoginattempt' => 'Log in',
        ), $edit_tokens = array(
                'wpEditToken',
                'wpEdittime',
                'wpStarttime',
        );

        // Authenticate if this is the first submit of the session
        if (!isset($req)) {
                // HACK: Lighttpd 1.4 (which WoWWiki uses) does not support "continue"
                // requests.  It seems that the only way to prevent HttpRequest from
                // using that header is to force it to make HTTP/1.0 requests.
                $req = new HttpRequest($url_wiki, HTTP_METH_POST, array('protocol' => HTTP_VERSION_1_0));
                $req->enableCookies();
                $req->setPostFields($req_fields);
                $req->send();

                $code = $req->getResponseCode();
                if ($code != 200 && $code != 302) throw new APBException("Login failed, HTTP code $code.", APB_EX_FATAL);

                $success = false;
                foreach ($req->getResponseCookies() as $obj) foreach ($obj->cookies as $name => $val) if ($name == "wowwikiUserName") {
                        $success = true;
                        break;
                }
                if (!$success) throw new APBException("Login failed, login cookies not set.", APB_EX_FATAL);
        }

        // Get the necessary edit tokens
//      $req->setPostFields(array('title' => $title, 'action' => 'edit'));
//      $req->send();
        // HACK: wpStarttime and wpEdittime are empty for POST requests.
        $req->setMethod(HTTP_METH_GET);
        $req->setUrl("$url_wiki?action=edit&title=$title");
        $req->send();
        $req->setMethod(HTTP_METH_POST);
        $req->setUrl($url_wiki);

        $code = $req->getResponseCode();
        if ($code != 200 && $code != 302) throw new APBException("Post failed, HTTP code $code.", APB_EX_SKIP);

        // Save the message body so that the edit tokens can be extracted after resetting the POST fields.
        $body = $req->getResponseBody();

        $submit_fields = array(
                'action' => 'submit',
                'title' => $title,
                'wpSave' => 'Save page',
                'wpSection' => '',
                'wpSummary' => $comment,
                'wpTextbox1' => $text,
        );

        if ($minor) $submit_fields['wpMinoredit'] = 1;

        foreach ($edit_tokens as $token) {
                if (preg_match('/value\s*=\s*"(?<value>[^"]*)"\s[^<>]*name\s*=\s*"(?<name>' . $token . ')"/', $body, $match)) {
                        $submit_fields[$match['name']] = $match['value'];
                        continue;
                }

                if (preg_match('/name\s*=\s*"(?<name>' . $token . ')"\s[^<>]*value\s*=\s*"(?<value>[^"]*)"/', $body, $match)) {
                        $submit_fields[$match['name']] = $match['value'];
                        continue;
                }

                throw new APBException("Post failed, could not find required edit token \"$token\".", APB_EX_SKIP);
        }

        if ($post) {
                $req->setPostFields($submit_fields);
                $req->send();

                $code = $req->getResponseCode();
                if ($code != 200 && $code != 302) throw new APBException("Post failed, HTTP code $code.", APB_EX_SKIP);
        } else if ($verbose) {
                echo "\nTest run.  Would have posted to \"$title\":\n\n$text\n";
        }
}

// Currently, the two types of wiki pages supported by this bot (user list and
// character lists) use the same format, which allows non-word characters (e.g.
// list formatting) in front of each line for display purposes and ignores
// whitespace-only lines and any lines which contain "=" (headings) or "{"
// (templates).
function wiki_list($text) {
        $text = explode("\n", str_replace("\r", "\n", $text));

        $items = array();
        foreach ($text as $line) {
                if (false !== strpos($line, '=')) continue;
                if (false !== strpos($line, '{')) continue;
                if (false !== strpos($line, '<')) continue;
                if (false !== strpos($line, '[')) continue;
                if (preg_match('/^\s*$/', $line)) continue;
                $items[] = preg_replace('/^\s*(?:\W+\s*)*(.+)\s*$/', '$1', trim($line));
        }

        return $items;
}

// ----------------------------------------------------------------------------
// "main()"
// ----------------------------------------------------------------------------

$done_users = array();
$done_chars = array();

// Passing a command-line parameter of "--quiet" prevents status messages and non-fatal errors from being displayed.
// Passing "--verbose" causes status messages to be displayed on stdout.
if (isset($argv)) {
        if (in_array("--quiet", $argv)) {
                error_reporting(E_ERROR | E_USER_ERROR);
                $verbose = false;
        } else if (in_array("--verbose", $argv)) {
                $verbose = true;
        } else {
                $verbose = false;
        }

        if (in_array("--post", $argv)) $post = true;
        else {
                trigger_error("Updates will not be posted to WoWWiki unless this script is run with the \"--post\" option.", E_USER_WARNING);
                $post = false;
        }

        $users = array_filter(array_slice($argv, 1), create_function('$arg', 'return $arg{0} != "-";'));
} else {
        trigger_error("This script does not appear to have been run from the command line.  Updates will not be posted to WoWWiki unless this script is run from the command-line with the \"--post\" option.", E_USER_WARNING);

        $post = false;
        $verbose = false;
}

if (count($users) == 0) {
        try {
                if ($verbose) echo "Retrieving list of APB users...\n";
                if (!($users = wiki_get("User:ArmoryProfileBot/Users"))) trigger_error("User list is empty?", E_USER_ERROR);
                $users = wiki_list($users);
        } catch (APBException $ex) {
                // All errors are fatal here; no user list, no work.
                trigger_error($ex, E_USER_ERROR);
        } catch (Exception $ex) {
                trigger_error("Unexpected exception retrieving user list: $ex", E_USER_ERROR);
        }
}

foreach ($users as $user) {
        $user = str_replace(array("%2f", "%2F"), "/", urlencode(str_replace(' ', '_', $user)));

        // Don't process a user more than once.
        if (isset($done_users[$user]) && $done_users[$user]) continue;
        $done_users[$user] = true;

        try {
                if ($verbose) echo "Retrieving list of characters for \"$user\"...\n";
                if (!($names = wiki_get("User:$user/APB_Config"))) {
                        trigger_error("User config empty for \"$user\".", E_USER_WARNING);
                        continue;
                }
        } catch (APBException $ex) {
                if ($ex->getCode() == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR);
                trigger_error($ex, E_USER_WARNING);
                continue;
        } catch (Exception $ex) {
                trigger_error("Unexpected exception getting config for \"$user\": $ex", E_USER_WARNING);
                continue;
        }

        $names = wiki_list($names);
        if (count($names) > 20) {
                trigger_error($msg = "More than 20 rows.  Due to the complexity of the APB templates, putting more than 20 rows on a single page causes WoWWiki to choke.  Only the first 20 rows will be processed.", E_USER_WARNING);
                $table = "<!-- $msg -->\n";

                $skipped = array_slice($names, 20);
                $names = array_slice($names, 0, 20);
        } else {
                $table = "";
                $skipped = array();
        }

        $table .= "{{User:ArmoryProfileBot/Table}}\n";

        foreach ($names as $rawname) {
                // encode before splitting so that it only has to be encoded once
                $name = urlencode(toUTF8($rawname));

                if (-1 == ($pos = strpos($name, "+"))) {
                        trigger_error($msg = "Incorrectly formatted line \"$rawname\".", E_USER_WARNING);
                        $table .= "<!-- $msg -->\n";
                        if (count($skipped)) array_push($names, array_shift($skipped));
                        continue;
                }

                $region = strtoupper(substr($name, 0, $pos));
                $name = substr($name, $pos + 1);

                if (!$url_armory[$region]) {
                        trigger_error($msg = "Unrecognized region: \"$region\".", E_USER_WARNING);
                        $table .= "<!-- $msg -->\n";
                        if (count($skipped)) array_push($names, array_shift($skipped));
                        continue;
                }

                if (-1 == ($pos = strrpos($name, "+"))) {
                        trigger_error($msg = "Incorrectly formatted line \"$rawname\".", E_USER_WARNING);
                        $table .= "<!-- $msg -->\n";
                        if (count($skipped)) array_push($names, array_shift($skipped));
                        continue;
                }

                $realm = substr($name, 0, $pos);
                $name = ucfirst(strtolower(substr($name, $pos + 1)));

                if (!isset($done_chars[$region])) $done_chars[$region] = array();
                if (!isset($done_chars[$region][$realm])) $done_chars[$region][$realm] = array();

                if (isset($done_chars[$region][$realm][$name])) {
                        $char = $done_chars[$region][$realm][$name];
                } else {
                        $char = array("region" => $region);

                        foreach ($files as $file => $fields) {
                                try {
                                        if ($verbose) echo "Retrieving profile for \"$region $realm $name\"...\n";
                                        if (!($xml = armory_get($region, $file, "r=$realm&n=$name"))) {
                                                trigger_error($msg = "Could not read profile for character \"$name\" on server \"$realm\" in region \"$region\".", E_USER_WARNING);
                                                $table .= "<!-- $msg -->\n";
                                                if (count($skipped)) array_push($names, array_shift($skipped));
                                                continue 2;
                                        }
                                } catch (APBException $ex) {
                                        if ($ex->getCode() == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR);
                                        trigger_error($ex, E_USER_WARNING);
                                        if (count($skipped)) array_push($names, array_shift($skipped));
                                        continue 2;
                                } catch (Exception $ex) {
                                        // Don't expose unknown exceptions to the Wiki results.
                                        trigger_error(($msg = "Unexpected exception retrieving profile data for character \"$name\" on server \"$realm\" in region \"$region\".") . ": $ex", E_USER_WARNING);
                                        $table .= "<!-- $msg. -->\n";
                                        if (count($skipped)) array_push($names, array_shift($skipped));
                                        continue 2;
                                }

                                if (false !== strpos($xml, "<characterInfo/>")) {
                                        trigger_error($msg = "No character named \"$name\" on server \"$realm\" in region \"$region\" or other Armory error.", E_USER_WARNING);
                                        $table .= "<!-- $msg -->\n";
                                        if (count($skipped)) array_push($names, array_shift($skipped));
                                        continue 2;
                                }

                                try {
                                        $dom = new DOMDocument();
                                        $dom->loadXML($xml);
                                        $xpath = new DOMXPath($dom);
                                } catch (Exception $ex) {
                                        // Don't expose unknown exceptions to the Wiki results.
                                        trigger_error(($msg = "Error parsing profile data for character \"$name\" on server \"$realm\" in region \"$region\".") . ": $ex", E_USER_WARNING);
                                        $table .= "<!-- $msg -->\n";
                                        if (count($skipped)) array_push($names, array_shift($skipped));
                                        continue 2;
                                }

                                foreach ($fields as $key => $path) {
                                        $nodes = $xpath->query($path);
                                        if ($nodes && $nodes->length) $char[$key] = $nodes->item(0)->value;
                                        else $char[$key] = "";
                                }
                        }

                        // The class name is required to even set the row template; without it, give up.
                        if (!$char["class"]) {
                                trigger_error($msg = "Could not determine class for \"$region $realm $name\".", E_USER_WARNING);
                                $table .= "<!-- $msg -->\n";
                                if (count($skipped)) array_push($names, array_shift($skipped));
                                continue;
                        }

                        // "guild" should be the only field which can contain <, >, &, or | (all of which are dangerous)
                        if ($char["guild"]) $char["guild"] = str_replace("|", "|", htmlspecialchars($char["guild"], ENT_NOQUOTES));

                        foreach (array("str", "agi", "sta", "int", "spi", "arm") as $key) {
                                if ($char[$key] == $char["${key}base"]) {
                                        $char["${key}base"] = "";
                                        $char["${key}plus"] = "";
                                } else {
                                        $char["${key}plus"] = $char[$key] - $char["${key}base"];
                                }
                        }

                        if (is_numeric($char["talent1"])) {
                                $spec = array($char["talent1"], $char["talent2"], $char["talent3"]);
                                arsort($spec);
                                $spec = array_keys($spec);
                                if ($char["talent" . ($spec[0] + 1)] == 0) {
                                        $char["spec1"] = "None";
                                        $char["spec2"] = "";
                                } else {
                                        $lim = $char["talent" . ($spec[0] + 1)] / 2;
                                        if ($char["talent" . ($spec[2] + 1)] >= $lim) {
                                                $char["spec1"] = "Hybrid";
                                                $char["spec2"] = "";
                                        } else if ($char["talent" . ($spec[1] + 1)] >= $lim) {
                                                $char["spec1"] = $spec[0] + 1;
                                                $char["spec2"] = $spec[1] + 1;
                                        } else {
                                                $char["spec1"] = $spec[0] + 1;
                                                $char["spec2"] = "";
                                        }
                                }

                                $char["talents"] = "$char[talent1]/$char[talent2]/$char[talent3]";
                        } else {
                                $char["spec1"] = "";
                                $char["spec2"] = "";
                                $char["talents"] = "";
                        }

                        $done_chars[$region][$realm][$name] = $char;
                }

                $table .= preg_replace('/\|+}}/', '}}', "{{User:ArmoryProfileBot/Row$char[class]|$char[region]|$char[realm]|$char[name]|$char[gender]|$char[race]|$char[level]|$char[guild]|$char[spec1]|$char[spec2]|$char[talents]|$char[health]|$char[mana]|$char[str]|$char[strbase]|$char[strplus]|$char[agi]|$char[agibase]|$char[agiplus]|$char[sta]|$char[stabase]|$char[staplus]|$char[int]|$char[intbase]|$char[intplus]|$char[spi]|$char[spibase]|$char[spiplus]|$char[arm]|$char[armbase]|$char[armplus]|$char[prof1]|$char[prof1val]|$char[prof2]|$char[prof2val]|$char[cooking]|$char[firstaid]|$char[fishing]|$char[riding]}}\n");
        }

        foreach ($skipped as $row) $table .= "<!-- Skipped \"$row\" (too many rows) -->\n";

        try {
                if ($verbose) echo "Posting character table for \"$user\"...\n";
                wiki_put("User:$user/APB_Table", "$table|}");
        } catch (APBException $ex) {
                if ($ex->getCode() == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR);
                trigger_error($ex, E_USER_WARNING);
                continue;
        } catch (Exception $ex) {
                trigger_error("Unexpected exception posting charcter table for \"$user\": $ex", E_USER_WARNING);
                continue;
        }
}

Around Wikia's network

Random Wiki