User:ArmoryProfileBot/Sourceedit this page
From WoWWiki
This bot was written in PHP and uses the HTTP extension. It currently runs from a DreamHost shell account using PHP 5.2.3 (cli).
<?php
// No promises on when these TODOs will get handled. Don't hold your breath.
// APB 2.0 features:
// * HttpRequestPool?
// * subst:-chained templates (significantly reduce display-time load)
// * round-the-clock updates with burst-resistance queueing?
// * per-character templates includable from any page
//
// APB 2.x features:
// * find characters using API "embeddedin" and "templates" (abolish U:APB/Users, U:*/APB Config, and U:*/APB Table)
// * stats tailored to build (i.e. no Str for mages, +heal for resto druids, etc.)
// ----------------------------------------------------------------------------
// Reference data
// ----------------------------------------------------------------------------
$url_armory = array('US' => "http://www.wowarmory.com/", 'EU' => "http://eu.wowarmory.com/");
$url_wiki = "http://www.wowwiki.com/index.php";
// Exception codes
define('APB_EX_FATAL', 1); // Abort the whole program.
define('APB_EX_SKIP', 2); // Skip the current item.
// Profile fields are accessed as XPaths for sanity. Add new data fields here; this array is foreach()ed.
$files = array();
$files['character-sheet'] = array(
'name' => '//character/@name',
'realm' => '//character/@realm',
'race' => '//character/@race',
'gender' => '//character/@gender',
'class' => '//character/@class',
'guild' => '//character/@guildName',
'level' => '//character/@level',
'talent1' => '//talentSpec/@treeOne',
'talent2' => '//talentSpec/@treeTwo',
'talent3' => '//talentSpec/@treeThree',
'prof1' => '//professions/skill[1]/@name',
'prof1val' => '//professions/skill[1]/@value',
'prof2' => '//professions/skill[2]/@name',
'prof2val' => '//professions/skill[2]/@value',
'health' => '//health/@effective',
'mana' => '//secondBar[@type="m"]/@effective',
'str' => '//strength/@effective',
'strbase' => '//strength/@base',
'agi' => '//agility/@effective',
'agibase' => '//agility/@base',
'sta' => '//stamina/@effective',
'stabase' => '//stamina/@base',
'int' => '//intellect/@effective',
'intbase' => '//intellect/@base',
'spi' => '//spirit/@effective',
'spibase' => '//spirit/@base',
'arm' => '//armor/@effective',
'armbase' => '//armor/@base',
);
$files['character-skills'] = array(
'cooking' => '//skill[@key="cooking"]/@value',
'firstaid' => '//skill[@key="firstaid"]/@value',
'fishing' => '//skill[@key="fishing"]/@value',
'riding' => '//skill[@key="riding"]/@value',
);
// ----------------------------------------------------------------------------
// Classes
// ----------------------------------------------------------------------------
// This only exists so that we can tell our own exceptions apart.
class APBException extends Exception {}
// ----------------------------------------------------------------------------
// Functions
// ----------------------------------------------------------------------------
function armory_get($region, $file, $query) {
global $url_armory;
$msg = http_get($url_armory[$region] . "$file.xml?$query", array('redirect' => 10,'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219 Firefox/2.0.0.2'), $info);
// Second attempt on DNS errors
if (isset($info) && !empty($info['error']) && $info['error'] == "name lookup timed out") $msg = http_get($url_armory[$region] . "$file.xml?$query", array('redirect' => 10,'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219 Firefox/2.0.0.2'), $info);
if (isset($info) && !empty($info['error'])) throw new APBException("Failed to retrieve profile, error \"$info[error]\"", APB_EX_SKIP);
if (isset($info) && isset($info['response_code']) && $info['response_code'] != 200) throw new APBException("Failed to retrieve profile, HTTP code $info[response_code]", APB_EX_SKIP);
if (empty($msg)) throw new APBException("Failed to retrieve profile, unknown error", APB_EX_SKIP);
return http_parse_message($msg)->body;
}
// Wiki pages can contain a mix of UTF-8 and HTML entities, so the entire
// string can't be passed to mb_convert_encoding. Also, entities will be
// double encoded (e.g. ö). This function accounts for both
// conditions.
function toUTF8($str) {
$count = 1;
while ($count) $str = preg_replace('/&(?:[0-9A-Za-z]+|#(?:[0-9]+|(x[0-9A-Fa-f]+)));/e', 'mb_convert_encoding(isset($1) ? "&#" . hexdec($1) . ";" : $0, "UTF-8", "HTML-ENTITIES")', $str, -1, $count);
return $str;
}
function wiki_get($title) {
global $url_wiki;
// The "raw" action appears to be similar to Special:Export, but without the data we don't care about.
$msg = http_get("$url_wiki?action=raw&title=" . urlencode($title), array(), $info);
// Second attempt on DNS errors
if (isset($info) && !empty($info['error']) && $info['error'] == "name lookup timed out") $msg = http_get("$url_wiki?action=raw&title=" . urlencode($title), array(), $info);
if (isset($info) && !empty($info['error'])) throw new APBException("Failed to retrieve wiki page, error \"$info[error]\"", APB_EX_SKIP);
if (isset($info) && isset($info['response_code']) && $info['response_code'] != 200) throw new APBException("Failed to retrieve wiki page, HTTP code $info[response_code]", APB_EX_SKIP);
if (empty($msg)) throw new APBException("Failed to retrieve wiki page, unknown error", APB_EX_SKIP);
$body = http_parse_message($msg)->body;
if (preg_match('/<html/i', $body)) throw new APBException("Page contained document tag; probably a non-existent page.", APB_EX_SKIP);
return $body;
}
// This function uses a static HttpRequest object across calls for authentication purposes.
function wiki_put($title, $text, $comment = "ArmoryProfileBot character profile update", $minor = false) {
global $post, $url_wiki, $verbose;
static $req, $req_fields = array(
'title' => 'Special:Userlogin',
'action' => 'submitlogin',
'wpName' => 'ArmoryProfileBot',
'wpPassword' => 'yeah, like I'd leave it in',
'wpLoginattempt' => 'Log in',
), $edit_tokens = array(
'wpEditToken',
'wpEdittime',
'wpStarttime',
);
// Authenticate if this is the first submit of the session
if (!isset($req)) {
// HACK: Lighttpd 1.4 (which WoWWiki uses) does not support "continue"
// requests. It seems that the only way to prevent HttpRequest from
// using that header is to force it to make HTTP/1.0 requests.
$req = new HttpRequest($url_wiki, HTTP_METH_POST, array('protocol' => HTTP_VERSION_1_0));
$req->enableCookies();
$req->setPostFields($req_fields);
$req->send();
$code = $req->getResponseCode();
if ($code != 200 && $code != 302) throw new APBException("Login failed, HTTP code $code.", APB_EX_FATAL);
$success = false;
foreach ($req->getResponseCookies() as $obj) foreach ($obj->cookies as $name => $val) if ($name == "wowwikiUserName") {
$success = true;
break;
}
if (!$success) throw new APBException("Login failed, login cookies not set.", APB_EX_FATAL);
}
// Get the necessary edit tokens
// $req->setPostFields(array('title' => $title, 'action' => 'edit'));
// $req->send();
// HACK: wpStarttime and wpEdittime are empty for POST requests.
$req->setMethod(HTTP_METH_GET);
$req->setUrl("$url_wiki?action=edit&title=$title");
$req->send();
$req->setMethod(HTTP_METH_POST);
$req->setUrl($url_wiki);
$code = $req->getResponseCode();
if ($code != 200 && $code != 302) throw new APBException("Post failed, HTTP code $code.", APB_EX_SKIP);
// Save the message body so that the edit tokens can be extracted after resetting the POST fields.
$body = $req->getResponseBody();
$submit_fields = array(
'action' => 'submit',
'title' => $title,
'wpSave' => 'Save page',
'wpSection' => '',
'wpSummary' => $comment,
'wpTextbox1' => $text,
);
if ($minor) $submit_fields['wpMinoredit'] = 1;
foreach ($edit_tokens as $token) {
if (preg_match('/value\s*=\s*"(?<value>[^"]*)"\s[^<>]*name\s*=\s*"(?<name>' . $token . ')"/', $body, $match)) {
$submit_fields[$match['name']] = $match['value'];
continue;
}
if (preg_match('/name\s*=\s*"(?<name>' . $token . ')"\s[^<>]*value\s*=\s*"(?<value>[^"]*)"/', $body, $match)) {
$submit_fields[$match['name']] = $match['value'];
continue;
}
throw new APBException("Post failed, could not find required edit token \"$token\".", APB_EX_SKIP);
}
if ($post) {
$req->setPostFields($submit_fields);
$req->send();
$code = $req->getResponseCode();
if ($code != 200 && $code != 302) throw new APBException("Post failed, HTTP code $code.", APB_EX_SKIP);
} else if ($verbose) {
echo "\nTest run. Would have posted to \"$title\":\n\n$text\n";
}
}
// Currently, the two types of wiki pages supported by this bot (user list and
// character lists) use the same format, which allows non-word characters (e.g.
// list formatting) in front of each line for display purposes and ignores
// whitespace-only lines and any lines which contain "=" (headings) or "{"
// (templates).
function wiki_list($text) {
$text = explode("\n", str_replace("\r", "\n", $text));
$items = array();
foreach ($text as $line) {
if (false !== strpos($line, '=')) continue;
if (false !== strpos($line, '{')) continue;
if (false !== strpos($line, '<')) continue;
if (false !== strpos($line, '[')) continue;
if (preg_match('/^\s*$/', $line)) continue;
$items[] = preg_replace('/^\s*(?:\W+\s*)*(.+)\s*$/', '$1', trim($line));
}
return $items;
}
// ----------------------------------------------------------------------------
// "main()"
// ----------------------------------------------------------------------------
$done_users = array();
$done_chars = array();
// Passing a command-line parameter of "--quiet" prevents status messages and non-fatal errors from being displayed.
// Passing "--verbose" causes status messages to be displayed on stdout.
if (isset($argv)) {
if (in_array("--quiet", $argv)) {
error_reporting(E_ERROR | E_USER_ERROR);
$verbose = false;
} else if (in_array("--verbose", $argv)) {
$verbose = true;
} else {
$verbose = false;
}
if (in_array("--post", $argv)) $post = true;
else {
trigger_error("Updates will not be posted to WoWWiki unless this script is run with the \"--post\" option.", E_USER_WARNING);
$post = false;
}
$users = array_filter(array_slice($argv, 1), create_function('$arg', 'return $arg{0} != "-";'));
} else {
trigger_error("This script does not appear to have been run from the command line. Updates will not be posted to WoWWiki unless this script is run from the command-line with the \"--post\" option.", E_USER_WARNING);
$post = false;
$verbose = false;
}
if (count($users) == 0) {
try {
if ($verbose) echo "Retrieving list of APB users...\n";
if (!($users = wiki_get("User:ArmoryProfileBot/Users"))) trigger_error("User list is empty?", E_USER_ERROR);
$users = wiki_list($users);
} catch (APBException $ex) {
// All errors are fatal here; no user list, no work.
trigger_error($ex, E_USER_ERROR);
} catch (Exception $ex) {
trigger_error("Unexpected exception retrieving user list: $ex", E_USER_ERROR);
}
}
foreach ($users as $user) {
$user = str_replace(array("%2f", "%2F"), "/", urlencode(str_replace(' ', '_', $user)));
// Don't process a user more than once.
if (isset($done_users[$user]) && $done_users[$user]) continue;
$done_users[$user] = true;
try {
if ($verbose) echo "Retrieving list of characters for \"$user\"...\n";
if (!($names = wiki_get("User:$user/APB_Config"))) {
trigger_error("User config empty for \"$user\".", E_USER_WARNING);
continue;
}
} catch (APBException $ex) {
if ($ex->getCode() == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR);
trigger_error($ex, E_USER_WARNING);
continue;
} catch (Exception $ex) {
trigger_error("Unexpected exception getting config for \"$user\": $ex", E_USER_WARNING);
continue;
}
$names = wiki_list($names);
if (count($names) > 20) {
trigger_error($msg = "More than 20 rows. Due to the complexity of the APB templates, putting more than 20 rows on a single page causes WoWWiki to choke. Only the first 20 rows will be processed.", E_USER_WARNING);
$table = "<!-- $msg -->\n";
$skipped = array_slice($names, 20);
$names = array_slice($names, 0, 20);
} else {
$table = "";
$skipped = array();
}
$table .= "{{User:ArmoryProfileBot/Table}}\n";
foreach ($names as $rawname) {
// encode before splitting so that it only has to be encoded once
$name = urlencode(toUTF8($rawname));
if (-1 == ($pos = strpos($name, "+"))) {
trigger_error($msg = "Incorrectly formatted line \"$rawname\".", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue;
}
$region = strtoupper(substr($name, 0, $pos));
$name = substr($name, $pos + 1);
if (!$url_armory[$region]) {
trigger_error($msg = "Unrecognized region: \"$region\".", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue;
}
if (-1 == ($pos = strrpos($name, "+"))) {
trigger_error($msg = "Incorrectly formatted line \"$rawname\".", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue;
}
$realm = substr($name, 0, $pos);
$name = ucfirst(strtolower(substr($name, $pos + 1)));
if (!isset($done_chars[$region])) $done_chars[$region] = array();
if (!isset($done_chars[$region][$realm])) $done_chars[$region][$realm] = array();
if (isset($done_chars[$region][$realm][$name])) {
$char = $done_chars[$region][$realm][$name];
} else {
$char = array("region" => $region);
foreach ($files as $file => $fields) {
try {
if ($verbose) echo "Retrieving profile for \"$region $realm $name\"...\n";
if (!($xml = armory_get($region, $file, "r=$realm&n=$name"))) {
trigger_error($msg = "Could not read profile for character \"$name\" on server \"$realm\" in region \"$region\".", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue 2;
}
} catch (APBException $ex) {
if ($ex->getCode() == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR);
trigger_error($ex, E_USER_WARNING);
if (count($skipped)) array_push($names, array_shift($skipped));
continue 2;
} catch (Exception $ex) {
// Don't expose unknown exceptions to the Wiki results.
trigger_error(($msg = "Unexpected exception retrieving profile data for character \"$name\" on server \"$realm\" in region \"$region\".") . ": $ex", E_USER_WARNING);
$table .= "<!-- $msg. -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue 2;
}
if (false !== strpos($xml, "<characterInfo/>")) {
trigger_error($msg = "No character named \"$name\" on server \"$realm\" in region \"$region\" or other Armory error.", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue 2;
}
try {
$dom = new DOMDocument();
$dom->loadXML($xml);
$xpath = new DOMXPath($dom);
} catch (Exception $ex) {
// Don't expose unknown exceptions to the Wiki results.
trigger_error(($msg = "Error parsing profile data for character \"$name\" on server \"$realm\" in region \"$region\".") . ": $ex", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue 2;
}
foreach ($fields as $key => $path) {
$nodes = $xpath->query($path);
if ($nodes && $nodes->length) $char[$key] = $nodes->item(0)->value;
else $char[$key] = "";
}
}
// The class name is required to even set the row template; without it, give up.
if (!$char["class"]) {
trigger_error($msg = "Could not determine class for \"$region $realm $name\".", E_USER_WARNING);
$table .= "<!-- $msg -->\n";
if (count($skipped)) array_push($names, array_shift($skipped));
continue;
}
// "guild" should be the only field which can contain <, >, &, or | (all of which are dangerous)
if ($char["guild"]) $char["guild"] = str_replace("|", "|", htmlspecialchars($char["guild"], ENT_NOQUOTES));
foreach (array("str", "agi", "sta", "int", "spi", "arm") as $key) {
if ($char[$key] == $char["${key}base"]) {
$char["${key}base"] = "";
$char["${key}plus"] = "";
} else {
$char["${key}plus"] = $char[$key] - $char["${key}base"];
}
}
if (is_numeric($char["talent1"])) {
$spec = array($char["talent1"], $char["talent2"], $char["talent3"]);
arsort($spec);
$spec = array_keys($spec);
if ($char["talent" . ($spec[0] + 1)] == 0) {
$char["spec1"] = "None";
$char["spec2"] = "";
} else {
$lim = $char["talent" . ($spec[0] + 1)] / 2;
if ($char["talent" . ($spec[2] + 1)] >= $lim) {
$char["spec1"] = "Hybrid";
$char["spec2"] = "";
} else if ($char["talent" . ($spec[1] + 1)] >= $lim) {
$char["spec1"] = $spec[0] + 1;
$char["spec2"] = $spec[1] + 1;
} else {
$char["spec1"] = $spec[0] + 1;
$char["spec2"] = "";
}
}
$char["talents"] = "$char[talent1]/$char[talent2]/$char[talent3]";
} else {
$char["spec1"] = "";
$char["spec2"] = "";
$char["talents"] = "";
}
$done_chars[$region][$realm][$name] = $char;
}
$table .= preg_replace('/\|+}}/', '}}', "{{User:ArmoryProfileBot/Row$char[class]|$char[region]|$char[realm]|$char[name]|$char[gender]|$char[race]|$char[level]|$char[guild]|$char[spec1]|$char[spec2]|$char[talents]|$char[health]|$char[mana]|$char[str]|$char[strbase]|$char[strplus]|$char[agi]|$char[agibase]|$char[agiplus]|$char[sta]|$char[stabase]|$char[staplus]|$char[int]|$char[intbase]|$char[intplus]|$char[spi]|$char[spibase]|$char[spiplus]|$char[arm]|$char[armbase]|$char[armplus]|$char[prof1]|$char[prof1val]|$char[prof2]|$char[prof2val]|$char[cooking]|$char[firstaid]|$char[fishing]|$char[riding]}}\n");
}
foreach ($skipped as $row) $table .= "<!-- Skipped \"$row\" (too many rows) -->\n";
try {
if ($verbose) echo "Posting character table for \"$user\"...\n";
wiki_put("User:$user/APB_Table", "$table|}");
} catch (APBException $ex) {
if ($ex->getCode() == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR);
trigger_error($ex, E_USER_WARNING);
continue;
} catch (Exception $ex) {
trigger_error("Unexpected exception posting charcter table for \"$user\": $ex", E_USER_WARNING);
continue;
}
}
