Import Ruty
This commit is contained in:
@@ -0,0 +1,415 @@
|
||||
<?php
|
||||
|
||||
namespace RtfHtmlPhp;
|
||||
|
||||
class Document
|
||||
{
|
||||
/** @var string RTF string being parsed */
|
||||
protected $rtf;
|
||||
/** @var int Current position in RTF string */
|
||||
protected $pos;
|
||||
/** @var int Length of RTF string */
|
||||
protected $len;
|
||||
/** @var Group Current RTF group */
|
||||
protected $group;
|
||||
|
||||
/** @var Group Root group */
|
||||
public $root = null;
|
||||
|
||||
/**
|
||||
* Object contructor
|
||||
*
|
||||
* @param string $rtf The RTF content
|
||||
*/
|
||||
public function __construct($rtf)
|
||||
{
|
||||
$this->parse($rtf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next character from the RTF stream.
|
||||
* Parsing is aborted when reading beyond end of input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function getChar()
|
||||
{
|
||||
$this->char = null;
|
||||
|
||||
if ($this->pos < strlen($this->rtf)) {
|
||||
$this->char = $this->rtf[$this->pos++];
|
||||
} else {
|
||||
$err = "Parse error: Tried to read past end of input; RTF is probably truncated.";
|
||||
throw new \Exception($err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* (Helper method) Is the current character a letter?
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected function isLetter()
|
||||
{
|
||||
if (ord($this->char) >= 65 && ord($this->char) <= 90) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ord($this->char) >= 97 && ord($this->char) <= 122) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Helper method) Is the current character a digit?
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected function isDigit()
|
||||
{
|
||||
return (ord($this->char) >= 48 && ord($this->char) <= 57);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Helper method) Is the current character end-of-line (EOL)?
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected function isEndOfLine()
|
||||
{
|
||||
if ($this->char == "\r" || $this->char == "\n") {
|
||||
// Checks for a Windows/Acron type EOL
|
||||
if ($this->rtf[$this->pos] == "\n" || $this->rtf[$this->pos] == "\r") {
|
||||
$this->getChar();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Helper method) Is the current character for a space delimiter?
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
protected function isSpaceDelimiter()
|
||||
{
|
||||
return ($this->char == " " || $this->isEndOfLine());
|
||||
}
|
||||
|
||||
/**
|
||||
* Store state of document on stack.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parseStartGroup()
|
||||
{
|
||||
$group = new Group();
|
||||
|
||||
if ($this->group != null) {
|
||||
// Make the new group a child of the current group
|
||||
$group->parent = $this->group;
|
||||
|
||||
array_push($this->group->children, $group);
|
||||
array_push($this->uc, end($this->uc));
|
||||
} else {
|
||||
// If there is no parent group, then set this group
|
||||
// as the root group.
|
||||
$this->root = $group;
|
||||
// Create uc stack and insert the first default value
|
||||
$this->uc = [1];
|
||||
}
|
||||
|
||||
// Set the new group as the current group:
|
||||
$this->group = $group;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve state of document from stack.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parseEndGroup()
|
||||
{
|
||||
$this->group = $this->group->parent;
|
||||
// Retrieve last uc value from stack
|
||||
array_pop($this->uc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse ControlWord element
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parseControlWord()
|
||||
{
|
||||
// Read letters until a non-letter is reached.
|
||||
$word = '';
|
||||
$this->getChar();
|
||||
|
||||
while ($this->isLetter()) {
|
||||
$word .= $this->char;
|
||||
$this->getChar();
|
||||
}
|
||||
|
||||
// Read parameter (if any) consisting of digits.
|
||||
// Parameter may be negative, i.e., starting with a '-'
|
||||
$parameter = null;
|
||||
$negative = false;
|
||||
|
||||
if ($this->char == '-') {
|
||||
$this->getChar();
|
||||
$negative = true;
|
||||
}
|
||||
|
||||
while ($this->isDigit()) {
|
||||
if ($parameter === null) {
|
||||
$parameter = 0;
|
||||
}
|
||||
$parameter = $parameter * 10 + $this->char;
|
||||
$this->getChar();
|
||||
}
|
||||
|
||||
// If no parameter present, assume control word's default (usually 1)
|
||||
// If no default then assign 0 to the parameter
|
||||
if ($parameter === null) {
|
||||
$parameter = 1;
|
||||
}
|
||||
|
||||
// Convert parameter to a negative number when applicable
|
||||
if ($negative) {
|
||||
$parameter = -$parameter;
|
||||
}
|
||||
|
||||
// Update uc value
|
||||
if ($word == "uc") {
|
||||
array_pop($this->uc);
|
||||
$this->uc[] = $parameter;
|
||||
}
|
||||
|
||||
// Skip space delimiter
|
||||
if (!$this->isSpaceDelimiter()) {
|
||||
$this->pos--;
|
||||
}
|
||||
|
||||
// If this is \u, then the parameter will be followed
|
||||
// by {$this->uc} characters.
|
||||
if ($word == "u") {
|
||||
// Convert parameter to unsigned decimal unicode
|
||||
if ($negative) {
|
||||
$parameter = 65536 + $parameter;
|
||||
}
|
||||
|
||||
// Will ignore replacement characters $uc times
|
||||
$uc = end($this->uc);
|
||||
|
||||
while ($uc > 0) {
|
||||
$this->getChar();
|
||||
// If the replacement character is encoded as
|
||||
// hexadecimal value \'hh then jump over it
|
||||
if ($this->char == "\\" && $this->rtf[$this->pos] == '\'') {
|
||||
$this->pos = $this->pos + 3;
|
||||
} elseif ($this->char == '{' || $this->char == '{') {
|
||||
// Break if it's an RTF scope delimiter
|
||||
break;
|
||||
}
|
||||
|
||||
// - To include an RTF delimiter in skippable data, it must be
|
||||
// represented using the appropriate control symbol (that is,
|
||||
// escaped with a backslash,) as in plain text.
|
||||
//
|
||||
// - Any RTF control word or symbol is considered a single character
|
||||
// for the purposes of counting skippable characters. For this reason
|
||||
// it's more appropriate to create a $skip flag and let the Parse()
|
||||
// function take care of the skippable characters.
|
||||
$uc--;
|
||||
}
|
||||
}
|
||||
|
||||
// Add new RTF word as a child to the current group.
|
||||
$rtfword = new ControlWord();
|
||||
$rtfword->word = $word;
|
||||
$rtfword->parameter = $parameter;
|
||||
array_push($this->group->children, $rtfword);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse ControlSymbol element
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parseControlSymbol()
|
||||
{
|
||||
// Read symbol (one character only).
|
||||
$this->getChar();
|
||||
$symbol = $this->char;
|
||||
|
||||
// Exceptional case:
|
||||
// Treat EOL symbols as \par control word
|
||||
if ($this->isEndOfLine()) {
|
||||
$rtfword = new ControlWord();
|
||||
$rtfword->word = 'par';
|
||||
$rtfword->parameter = 0;
|
||||
array_push($this->group->children, $rtfword);
|
||||
return;
|
||||
}
|
||||
|
||||
// Symbols ordinarily have no parameter. However,
|
||||
// if this is \' (a single quote), then it is
|
||||
// followed by a 2-digit hex-code:
|
||||
$parameter = 0;
|
||||
if ($symbol == '\'') {
|
||||
$this->getChar();
|
||||
$parameter = $this->char;
|
||||
$this->getChar();
|
||||
$parameter = hexdec($parameter . $this->char);
|
||||
}
|
||||
|
||||
// Add new control symbol as a child to the current group:
|
||||
$rtfsymbol = new ControlSymbol();
|
||||
$rtfsymbol->symbol = $symbol;
|
||||
$rtfsymbol->parameter = $parameter;
|
||||
array_push($this->group->children, $rtfsymbol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Control element
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parseControl()
|
||||
{
|
||||
// Beginning of an RTF control word or control symbol.
|
||||
// Look ahead by one character to see if it starts with
|
||||
// a letter (control world) or another symbol (control symbol):
|
||||
$this->GetChar();
|
||||
$this->pos--; // (go back after look-ahead)
|
||||
|
||||
if ($this->isLetter()) {
|
||||
$this->parseControlWord();
|
||||
} else {
|
||||
$this->parseControlSymbol();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Text element
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parseText()
|
||||
{
|
||||
// Parse plain text up to backslash or brace,
|
||||
// unless escaped.
|
||||
$text = '';
|
||||
$terminate = false;
|
||||
|
||||
do {
|
||||
// Ignore EOL characters
|
||||
if ($this->char == "\r" || $this->char == "\n") {
|
||||
$this->getChar();
|
||||
continue;
|
||||
}
|
||||
// Is this an escape?
|
||||
if ($this->char == "\\") {
|
||||
// Perform lookahead to see if this
|
||||
// is really an escape sequence.
|
||||
$this->getChar();
|
||||
switch ($this->char) {
|
||||
case "\\":
|
||||
break;
|
||||
case '{':
|
||||
break;
|
||||
case '}':
|
||||
break;
|
||||
default:
|
||||
// Not an escape. Roll back.
|
||||
$this->pos = $this->pos - 2;
|
||||
$terminate = true;
|
||||
break;
|
||||
}
|
||||
} elseif ($this->char == '{' || $this->char == '}') {
|
||||
$this->pos--;
|
||||
$terminate = true;
|
||||
}
|
||||
|
||||
if (!$terminate) {
|
||||
// Save plain text
|
||||
$text .= $this->char;
|
||||
$this->getChar();
|
||||
}
|
||||
} while (!$terminate && $this->pos < $this->len);
|
||||
|
||||
// Create new Text element:
|
||||
$text = new Text($text);
|
||||
|
||||
// If there is no current group, then this is not a valid RTF file.
|
||||
// Throw an exception.
|
||||
if ($this->group == null) {
|
||||
throw new \Exception("Parse error: RTF text outside of group.");
|
||||
}
|
||||
|
||||
// Add text as a child to the current group:
|
||||
array_push($this->group->children, $text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to parse an RTF string.
|
||||
*
|
||||
* @param string $rtf RTF content
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function parse($rtf)
|
||||
{
|
||||
$this->rtf = $rtf;
|
||||
$this->pos = 0;
|
||||
$this->len = strlen($this->rtf);
|
||||
$this->group = null;
|
||||
$this->root = null;
|
||||
|
||||
while ($this->pos < $this->len-1) {
|
||||
// Read next character:
|
||||
$this->getChar();
|
||||
|
||||
// Ignore \r and \n
|
||||
if ($this->char == "\n" || $this->char == "\r") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// What type of character is this?
|
||||
switch ($this->char) {
|
||||
case '{':
|
||||
$this->parseStartGroup();
|
||||
break;
|
||||
case '}':
|
||||
$this->parseEndGroup();
|
||||
break;
|
||||
case "\\":
|
||||
$this->parseControl();
|
||||
break;
|
||||
default:
|
||||
$this->parseText();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns string representation of the document for debug purposes.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
if (!$this->root) {
|
||||
return "No root group";
|
||||
}
|
||||
|
||||
return $this->root->toString();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user