Import Ruty

This commit is contained in:
2024-03-11 00:57:00 +01:00
parent 50481b23df
commit 34a31bb184
617 changed files with 106612 additions and 0 deletions
@@ -0,0 +1,21 @@
<?php
namespace RtfHtmlPhp;
class ControlSymbol extends Element
{
public $symbol;
public $parameter = 0;
/**
* Returns string representation of the object for debug purposes
*
* @param int $level Indentation level
*
* @return string
*/
public function toString($level)
{
return str_repeat(" ", $level) . "SYMBOL {$this->symbol} ({$this->parameter})\n";
}
}
@@ -0,0 +1,21 @@
<?php
namespace RtfHtmlPhp;
class ControlWord extends Element
{
public $word;
public $parameter;
/**
* Returns string representation of the object for debug purposes
*
* @param int $level Indentation level
*
* @return string
*/
public function toString($level)
{
return str_repeat(" ", $level) . "WORD {$this->word} ({$this->parameter})\n";
}
}
@@ -0,0 +1,415 @@
<?php
namespace RtfHtmlPhp;
class Document
{
/** @var string RTF string being parsed */
protected $rtf;
/** @var int Current position in RTF string */
protected $pos;
/** @var int Length of RTF string */
protected $len;
/** @var Group Current RTF group */
protected $group;
/** @var Group Root group */
public $root = null;
/**
* Object contructor
*
* @param string $rtf The RTF content
*/
public function __construct($rtf)
{
$this->parse($rtf);
}
/**
* Get the next character from the RTF stream.
* Parsing is aborted when reading beyond end of input string.
*
* @return string
*/
protected function getChar()
{
$this->char = null;
if ($this->pos < strlen($this->rtf)) {
$this->char = $this->rtf[$this->pos++];
} else {
$err = "Parse error: Tried to read past end of input; RTF is probably truncated.";
throw new \Exception($err);
}
}
/**
* (Helper method) Is the current character a letter?
*
* @return bool
*/
protected function isLetter()
{
if (ord($this->char) >= 65 && ord($this->char) <= 90) {
return true;
}
if (ord($this->char) >= 97 && ord($this->char) <= 122) {
return true;
}
return false;
}
/**
* (Helper method) Is the current character a digit?
*
* @return bool
*/
protected function isDigit()
{
return (ord($this->char) >= 48 && ord($this->char) <= 57);
}
/**
* (Helper method) Is the current character end-of-line (EOL)?
*
* @return bool
*/
protected function isEndOfLine()
{
if ($this->char == "\r" || $this->char == "\n") {
// Checks for a Windows/Acron type EOL
if ($this->rtf[$this->pos] == "\n" || $this->rtf[$this->pos] == "\r") {
$this->getChar();
}
return true;
}
return false;
}
/**
* (Helper method) Is the current character for a space delimiter?
*
* @return bool
*/
protected function isSpaceDelimiter()
{
return ($this->char == " " || $this->isEndOfLine());
}
/**
* Store state of document on stack.
*
* @return void
*/
protected function parseStartGroup()
{
$group = new Group();
if ($this->group != null) {
// Make the new group a child of the current group
$group->parent = $this->group;
array_push($this->group->children, $group);
array_push($this->uc, end($this->uc));
} else {
// If there is no parent group, then set this group
// as the root group.
$this->root = $group;
// Create uc stack and insert the first default value
$this->uc = [1];
}
// Set the new group as the current group:
$this->group = $group;
}
/**
* Retrieve state of document from stack.
*
* @return void
*/
protected function parseEndGroup()
{
$this->group = $this->group->parent;
// Retrieve last uc value from stack
array_pop($this->uc);
}
/**
* Parse ControlWord element
*
* @return void
*/
protected function parseControlWord()
{
// Read letters until a non-letter is reached.
$word = '';
$this->getChar();
while ($this->isLetter()) {
$word .= $this->char;
$this->getChar();
}
// Read parameter (if any) consisting of digits.
// Parameter may be negative, i.e., starting with a '-'
$parameter = null;
$negative = false;
if ($this->char == '-') {
$this->getChar();
$negative = true;
}
while ($this->isDigit()) {
if ($parameter === null) {
$parameter = 0;
}
$parameter = $parameter * 10 + $this->char;
$this->getChar();
}
// If no parameter present, assume control word's default (usually 1)
// If no default then assign 0 to the parameter
if ($parameter === null) {
$parameter = 1;
}
// Convert parameter to a negative number when applicable
if ($negative) {
$parameter = -$parameter;
}
// Update uc value
if ($word == "uc") {
array_pop($this->uc);
$this->uc[] = $parameter;
}
// Skip space delimiter
if (!$this->isSpaceDelimiter()) {
$this->pos--;
}
// If this is \u, then the parameter will be followed
// by {$this->uc} characters.
if ($word == "u") {
// Convert parameter to unsigned decimal unicode
if ($negative) {
$parameter = 65536 + $parameter;
}
// Will ignore replacement characters $uc times
$uc = end($this->uc);
while ($uc > 0) {
$this->getChar();
// If the replacement character is encoded as
// hexadecimal value \'hh then jump over it
if ($this->char == "\\" && $this->rtf[$this->pos] == '\'') {
$this->pos = $this->pos + 3;
} elseif ($this->char == '{' || $this->char == '{') {
// Break if it's an RTF scope delimiter
break;
}
// - To include an RTF delimiter in skippable data, it must be
// represented using the appropriate control symbol (that is,
// escaped with a backslash,) as in plain text.
//
// - Any RTF control word or symbol is considered a single character
// for the purposes of counting skippable characters. For this reason
// it's more appropriate to create a $skip flag and let the Parse()
// function take care of the skippable characters.
$uc--;
}
}
// Add new RTF word as a child to the current group.
$rtfword = new ControlWord();
$rtfword->word = $word;
$rtfword->parameter = $parameter;
array_push($this->group->children, $rtfword);
}
/**
* Parse ControlSymbol element
*
* @return void
*/
protected function parseControlSymbol()
{
// Read symbol (one character only).
$this->getChar();
$symbol = $this->char;
// Exceptional case:
// Treat EOL symbols as \par control word
if ($this->isEndOfLine()) {
$rtfword = new ControlWord();
$rtfword->word = 'par';
$rtfword->parameter = 0;
array_push($this->group->children, $rtfword);
return;
}
// Symbols ordinarily have no parameter. However,
// if this is \' (a single quote), then it is
// followed by a 2-digit hex-code:
$parameter = 0;
if ($symbol == '\'') {
$this->getChar();
$parameter = $this->char;
$this->getChar();
$parameter = hexdec($parameter . $this->char);
}
// Add new control symbol as a child to the current group:
$rtfsymbol = new ControlSymbol();
$rtfsymbol->symbol = $symbol;
$rtfsymbol->parameter = $parameter;
array_push($this->group->children, $rtfsymbol);
}
/**
* Parse Control element
*
* @return void
*/
protected function parseControl()
{
// Beginning of an RTF control word or control symbol.
// Look ahead by one character to see if it starts with
// a letter (control world) or another symbol (control symbol):
$this->GetChar();
$this->pos--; // (go back after look-ahead)
if ($this->isLetter()) {
$this->parseControlWord();
} else {
$this->parseControlSymbol();
}
}
/**
* Parse Text element
*
* @return void
*/
protected function parseText()
{
// Parse plain text up to backslash or brace,
// unless escaped.
$text = '';
$terminate = false;
do {
// Ignore EOL characters
if ($this->char == "\r" || $this->char == "\n") {
$this->getChar();
continue;
}
// Is this an escape?
if ($this->char == "\\") {
// Perform lookahead to see if this
// is really an escape sequence.
$this->getChar();
switch ($this->char) {
case "\\":
break;
case '{':
break;
case '}':
break;
default:
// Not an escape. Roll back.
$this->pos = $this->pos - 2;
$terminate = true;
break;
}
} elseif ($this->char == '{' || $this->char == '}') {
$this->pos--;
$terminate = true;
}
if (!$terminate) {
// Save plain text
$text .= $this->char;
$this->getChar();
}
} while (!$terminate && $this->pos < $this->len);
// Create new Text element:
$text = new Text($text);
// If there is no current group, then this is not a valid RTF file.
// Throw an exception.
if ($this->group == null) {
throw new \Exception("Parse error: RTF text outside of group.");
}
// Add text as a child to the current group:
array_push($this->group->children, $text);
}
/**
* Attempt to parse an RTF string.
*
* @param string $rtf RTF content
*
* @return void
*/
protected function parse($rtf)
{
$this->rtf = $rtf;
$this->pos = 0;
$this->len = strlen($this->rtf);
$this->group = null;
$this->root = null;
while ($this->pos < $this->len-1) {
// Read next character:
$this->getChar();
// Ignore \r and \n
if ($this->char == "\n" || $this->char == "\r") {
continue;
}
// What type of character is this?
switch ($this->char) {
case '{':
$this->parseStartGroup();
break;
case '}':
$this->parseEndGroup();
break;
case "\\":
$this->parseControl();
break;
default:
$this->parseText();
break;
}
}
}
/**
* Returns string representation of the document for debug purposes.
*
* @return string
*/
public function __toString()
{
if (!$this->root) {
return "No root group";
}
return $this->root->toString();
}
}
@@ -0,0 +1,11 @@
<?php
namespace RtfHtmlPhp;
/**
* Element is the parent class of all RTF elements,
* like Group, ControlWord and ControlSymbol.
*/
class Element
{
}
+102
View File
@@ -0,0 +1,102 @@
<?php
namespace RtfHtmlPhp;
class Group extends Element
{
public $parent;
public $children;
/**
* Create a new Group, with no parent and no children.
*/
public function __construct()
{
$this->parent = null;
$this->children = [];
}
/**
* Returns group type
*
* @return string|null
*/
public function getType()
{
// No children? Then the group type is null.
if (count($this->children) == 0) {
return null;
}
$child = $this->children[0];
// If the first child is a control word, then
// the group type is the word.
if ($child instanceof ControlWord) {
return $child->word;
}
// If the first child is a control symbol, then
// the group type is * for a special symbol, or null.
if ($child instanceof ControlSymbol) {
return ($child->symbol == '*') ? '*' : null;
}
// If first child is neither word nor symbol, then
// group type is null.
return null;
}
/**
* If a group contains a '*' symbol as its first child,
* then it is a destination group.
*
* @return bool|null Group destination
*/
public function isDestination()
{
// If group has no children, then destination is null.
if (count($this->children) == 0) {
return null;
}
$child = $this->children[0];
// First child not a control symbol?
if (!$child instanceof ControlSymbol) {
return null;
}
return $child->symbol == '*';
}
/**
* Returns string representation of the object for debug purposes
*
* @param int $level Indentation level
*
* @return string
*/
public function toString($level = 0)
{
$str = str_repeat(" ", $level) . "{\n";
foreach ($this->children as $child) {
/*
// Skip some group types:
if ($child instanceof Group) {
if ($child->GetType() == "fonttbl") continue;
if ($child->GetType() == "colortbl") continue;
if ($child->GetType() == "stylesheet") continue;
if ($child->GetType() == "info") continue;
// Skip any pictures:
if (substr($child->GetType(), 0, 4) == "pict") continue;
if ($child->IsDestination()) continue;
}
*/
$str .= $child->toString($level + 1);
}
return $str . str_repeat(" ", $level) . "}\n";
}
}
@@ -0,0 +1,35 @@
<?php
namespace RtfHtmlPhp\Html;
class Font
{
public $family;
public $name;
public $charset;
public $codepage;
/**
* Returns font style (font-family) string
*
* @return string A string including font-family: prefix. An empty string if font is not set
*/
public function toStyle()
{
$list = [];
if ($this->name) {
$list[] = $this->name;
}
if ($this->family) {
$list[] = $this->family;
}
if (count($list) == 0) {
return '';
}
return "font-family:" . implode(',', $list);
}
}
@@ -0,0 +1,740 @@
<?php
namespace RtfHtmlPhp\Html;
use RtfHtmlPhp\Document;
class HtmlFormatter
{
protected $output = '';
protected $encoding;
protected $defaultFont;
protected $fromhtml = false;
/**
* Object constructor.
*
* By default, HtmlFormatter uses HTML_ENTITIES for code conversion.
* You can optionally support a different endoing when creating
* the HtmlFormatter instance.
*
* @param string $encoding Output encoding
*/
public function __construct($encoding = 'HTML-ENTITIES')
{
if (!extension_loaded('mbstring')) {
throw new \Exception("PHP mbstring extension not enabled");
}
if ($encoding != 'HTML-ENTITIES') {
// Check if the encoding is reconized by mbstring extension
if (!in_array($encoding, mb_list_encodings())) {
throw new \Exception("Unsupported encoding: $encoding");
}
}
$this->encoding = $encoding;
}
/**
* Generates HTML output for the document
*
* @param Document $document The document
*
* @return string HTML content
*/
public function format(Document $document)
{
// Clear current output
$this->output = '';
// Keep track of style modifications
$this->previousState = null;
// and create a stack of states
$this->states = [];
// Put an initial standard state onto the stack
$this->state = new State();
array_push($this->states, $this->state);
// Keep track of opened html tags
$this->openedTags = ['span' => false, 'p' => null];
// Begin format
$this->processGroup($document->root);
// Instead of removing opened tags, we close them
$this->output .= $this->openedTags['span'] ? '</span>' : '';
$this->output .= $this->openedTags['p'] ? '</p>' : '';
// Remove extra empty paragraph at the end
// TODO: Find the real reason it's there and fix it
$this->output = preg_replace('|<p></p>$|', '', $this->output);
return $this->output;
}
/**
* Registers a font definition.
*
* @param \RtfHtmlPhp\Group $fontGroup A group element with a font definition
*
* @return void
*/
protected function loadFont(\RtfHtmlPhp\Group $fontGroup)
{
$fontNumber = 0;
$font = new Font();
// Loop through children of the font group. The font group
// contains control words with the font number and charset,
// and a control text with the font name.
foreach ($fontGroup->children as $child) {
// Control word
if ($child instanceof \RtfHtmlPhp\ControlWord) {
switch ($child->word) {
case 'f':
$fontNumber = $child->parameter;
break;
// Font family names
case 'froman':
$font->family = "serif";
break;
case 'fswiss':
$font->family = "sans-serif";
break;
case 'fmodern':
$font->family = "monospace";
break;
case 'fscript':
$font->family = "cursive";
break;
case 'fdecor':
$font->family = "fantasy";
break;
// case 'fnil': break; // default font
// case 'ftech': break; // symbol
// case 'fbidi': break; // bidirectional font
case 'fcharset': // charset
$font->charset = $this->getEncodingFromCharset($child->parameter);
break;
case 'cpg': // code page
$font->codepage = $this->getEncodingFromCodepage($child->parameter);
break;
case 'fprq': // Font pitch
$font->fprq = $child->parameter;
break;
}
}
// Control text contains the font name, if any:
if ($child instanceof \RtfHtmlPhp\Text) {
// Store font name (except ; delimiter at end)
$font->name = substr($child->text, 0, -1);
}
/*
elseif ($child instanceof \RtfHtmlPhp\Group) {
// possible subgroups:
// '{\*' \falt #PCDATA '}' = alternate font name
// '{\*' \fontemb <fonttype> <fontfname>? <data>? '}'
// '{\*' \fontfile <codepage>? #PCDATA '}'
// '{\*' \panose <data> '}'
continue;
} elseif ($child instanceof \RtfHtmlPhp\ControlSymbol) {
// the only authorized symbol here is '*':
// \*\fname = non tagged file name (only WordPad uses it)
continue;
}
*/
}
State::setFont($fontNumber, $font);
}
protected function extractFontTable($fontTblGrp)
{
// {' \fonttbl (<fontinfo> | ('{' <fontinfo> '}'))+ '}'
// <fontnum><fontfamily><fcharset>?<fprq>?<panose>?
// <nontaggedname>?<fontemb>?<codepage>? <fontname><fontaltname>? ';'
// The Font Table group contains the control word "fonttbl" and some
// subgroups. Go through the subgroups, ignoring the "fonttbl"
// identifier.
foreach ($fontTblGrp->children as $child) {
// Ignore non-group, which should be the fonttbl identified word.
if (!($child instanceof \RtfHtmlPhp\Group)) {
continue;
}
// Load the font specification in the subgroup:
$this->loadFont($child);
}
}
protected function extractColorTable($colorTblGrp)
{
// {\colortbl;\red0\green0\blue0;}
// Index 0 of the RTF color table is the 'auto' color
$colortbl = [];
$c = count($colorTblGrp);
$color = '';
for ($i=1; $i<$c; $i++) { // Iterate through colors
if ($colorTblGrp[$i] instanceof \RtfHtmlPhp\ControlWord) {
// Extract RGB color and convert it to hex string
$color = sprintf(
'#%02x%02x%02x', // hex string format
$colorTblGrp[$i]->parameter, // red
$colorTblGrp[$i+1]->parameter, // green
$colorTblGrp[$i+2]->parameter // blue
);
$i+=2;
} elseif ($colorTblGrp[$i] instanceof \RtfHtmlPhp\Text) {
// This is a delimiter ';' so
if ($i != 1) { // Store the already extracted color
$colortbl[] = $color;
} else { // This is the 'auto' color
$colortbl[] = 0;
}
}
}
State::$colortbl = $colortbl;
}
protected function extractImage($pictGrp)
{
$image = new Image();
foreach ($pictGrp as $child) {
if ($child instanceof \RtfHtmlPhp\ControlWord) {
switch ($child->word) {
// Picture Format
case "emfblip":
$image->format = 'emf';
break;
case "pngblip":
$image->format = 'png';
break;
case "jpegblip":
$image->format = 'jpeg';
break;
case "macpict":
$image->format = 'pict';
break;
// case "wmetafile": $Image->format = 'bmp'; break;
// Picture size and scaling
case "picw":
$image->width = $child->parameter;
break;
case "pich":
$image->height = $child->parameter;
break;
case "picwgoal":
$image->goalWidth = $child->parameter;
break;
case "pichgoal":
$image->goalHeight = $child->parameter;
break;
case "picscalex":
$image->pcScaleX = $child->parameter;
break;
case "picscaley":
$image->pcScaleY = $child->parameter;
break;
// Binary or Hexadecimal Data ?
case "bin":
$image->binarySize = $child->parameter;
break;
}
} elseif ($child instanceof \RtfHtmlPhp\Text) {
// store Data
$image->imageData = $child->text;
}
}
// output Image
$this->output .= $image->printImage();
}
protected function processGroup($group)
{
// Special group processing:
switch ($group->getType()) {
case "fonttbl": // Extract font table
$this->extractFontTable($group);
return;
case "colortbl": // Extract color table
$this->extractColorTable($group->children);
return;
case "stylesheet":
// Stylesheet extraction not yet supported
return;
case "info":
// Ignore Document information
return;
case "pict":
$this->extractImage($group->children);
return;
case "nonshppict":
// Ignore alternative images
return;
case "*": // Process destination
$this->processDestination($group->children);
return;
}
// Pictures extraction not yet supported
// if (substr($group->GetType(), 0, 4) == "pict") { return; }
// Push a new state onto the stack:
$this->state = clone $this->state;
array_push($this->states, $this->state);
foreach ($group->children as $child) {
$this->formatEntry($child);
}
// Pop state from stack
array_pop($this->states);
$this->state = $this->states[count($this->states) - 1];
}
protected function processDestination($dest)
{
if (!$dest[1] instanceof \RtfHtmlPhp\ControlWord) {
return;
}
// Check if this is a Word 97 picture
if ($dest[1]->word == "shppict") {
$c = count($dest);
for ($i = 2; $i < $c; $i++) {
$this->formatEntry($dest[$i]);
}
} elseif ($dest[1]->word == "htmltag") {
$c = count($dest);
for ($i = 2; $i < $c; $i++) {
$entry = $dest[$i];
if ($entry instanceof \RtfHtmlPhp\Text) {
$this->output .= $entry->text;
} else {
$this->formatEntry($entry);
}
}
}
}
protected function formatEntry($entry)
{
if ($entry instanceof \RtfHtmlPhp\Group) {
$this->processGroup($entry);
} elseif ($entry instanceof \RtfHtmlPhp\ControlWord) {
$this->formatControlWord($entry);
} elseif ($entry instanceof \RtfHtmlPhp\ControlSymbol) {
$this->formatControlSymbol($entry);
} elseif ($entry instanceof \RtfHtmlPhp\Text) {
$this->formatText($entry);
}
}
protected function formatControlWord($word)
{
switch($word->word) {
case 'fromhtml':
$this->fromhtml = $word->parameter > 0;
break;
case 'htmlrtf':
$this->state->htmlrtf = $word->parameter > 0;
break;
case 'plain': // Reset font formatting properties to default.
case 'pard': // Reset to default paragraph properties.
$this->state->reset($this->defaultFont);
break;
// Font formatting properties:
case 'b': // bold
$this->state->bold = $word->parameter;
break;
case 'i': // italic
$this->state->italic = $word->parameter;
break;
case 'ul': // underline
$this->state->underline = $word->parameter;
break;
case 'ulnone': // no underline
$this->state->underline = false;
break;
case 'strike': // strike-through
$this->state->strike = $word->parameter;
break;
case 'v': // hidden
$this->state->hidden = $word->parameter;
break;
case 'fs': // Font size
$this->state->fontsize = ceil(($word->parameter / 24) * 16);
break;
case 'f': // Font
$this->state->font = $word->parameter;
break;
case 'deff': // Store default font
$this->defaultFont = $word->parameter;
break;
// Colors
case 'cf':
case 'chcfpat':
$this->state->fontcolor = $word->parameter;
break;
case 'cb':
case 'chcbpat':
$this->state->background = $word->parameter;
break;
case 'highlight':
$this->state->hcolor = $word->parameter;
break;
// Special characters
case 'lquote': $this->write($this->fromhtml ? "" : "&lsquo;"); break; // &#145; &#8216;
case 'rquote': $this->write($this->fromhtml ? "" : "&rsquo;"); break; // &#146; &#8217;
case 'ldblquote': $this->write($this->fromhtml ? "" : "&ldquo;"); break; // &#147; &#8220;
case 'rdblquote': $this->write($this->fromhtml ? "" : "&rdquo;"); break; // &#148; &#8221;
case 'bullet': $this->write($this->fromhtml ? "" : "&bull;"); break; // &#149; &#8226;
case 'endash': $this->write($this->fromhtml ? "" : "&ndash;"); break; // &#150; &#8211;
case 'emdash': $this->write($this->fromhtml ? "" : "&mdash;"); break; // &#151; &#8212;
case 'enspace': $this->write($this->fromhtml ? "" : "&ensp;"); break; // &#8194;
case 'emspace': $this->write($this->fromhtml ? "" : "&emsp;"); break; // &#8195;
case 'tab': $this->write($this->fromhtml ? "\t" : "&nbsp;"); break; // Character value 9
case 'line': $this->output .= $this->fromhtml ? "\n" : "<br/>"; break; // character value (line feed = &#10;) (carriage return = &#13;)
// Unicode characters
case 'u':
$uchar = $this->decodeUnicode($word->parameter);
$this->write($uchar);
break;
// Paragraphs
case 'par':
case 'row':
if ($this->fromhtml) {
$this->output .= "\n";
break;
}
// Close previously opened tags
$this->closeTags();
// Begin a new paragraph
$this->openTag('p');
break;
// Code pages
case 'ansi':
case 'mac':
case 'pc':
case 'pca':
$this->rtfEncoding = $this->getEncodingFromCodepage($word->word);
break;
case 'ansicpg':
if ($word->parameter) {
$this->rtfEncoding = $this->getEncodingFromCodepage($word->parameter);
}
break;
}
}
protected function decodeUnicode($code, $srcEnc = 'UTF-8')
{
$utf8 = false;
if ($srcEnc != 'UTF-8') { // convert character to Unicode
$utf8 = iconv($srcEnc, 'UTF-8', chr($code));
}
if ($this->encoding == 'HTML-ENTITIES') {
return $utf8 !== false ? "&#{$this->ordUtf8($utf8)};" : "&#{$code};";
}
if ($this->encoding == 'UTF-8') {
return $utf8 !== false ? $utf8 : mb_convert_encoding("&#{$code};", $this->encoding, 'HTML-ENTITIES');
}
return $utf8 !== false ? mb_convert_encoding($utf8, $this->encoding, 'UTF-8') :
mb_convert_encoding("&#{$code};", $this->encoding, 'HTML-ENTITIES');
}
protected function write($txt)
{
// Ignore regions that are not part of the original (encapsulated) HTML content
if ($this->state->htmlrtf) {
return;
}
if ($this->fromhtml) {
$this->output .= $txt;
return;
}
if ($this->openedTags['p'] === null) {
// Create the first paragraph
$this->openTag('p');
}
// Create a new 'span' element only when a style change occurs.
// 1st case: style change occured
// 2nd case: there is no change in style but the already created 'span'
// element is somehow closed (ex. because of an end of paragraph)
if (!$this->state->equals($this->previousState)
|| ($this->state->equals($this->previousState) && !$this->openedTags['span'])
) {
// If applicable close previously opened 'span' tag
$this->closeTag('span');
$style = $this->state->printStyle();
// Keep track of preceding style
$this->previousState = clone $this->state;
// Create style attribute and open span
$attr = $style ? "style=\"{$style}\"" : "";
$this->openTag('span', $attr);
}
$this->output .= $txt;
}
protected function openTag($tag, $attr = '')
{
// Ignore regions that are not part of the original (encapsulated) HTML content
if ($this->fromhtml) {
return;
}
$this->output .= $attr ? "<{$tag} {$attr}>" : "<{$tag}>";
$this->openedTags[$tag] = true;
}
protected function closeTag($tag)
{
if ($this->fromhtml) {
return;
}
if ($this->openedTags[$tag]) {
// Check for empty html elements
if (substr($this->output, -strlen("<{$tag}>")) == "<{$tag}>") {
switch ($tag) {
case 'p': // Replace empty 'p' element with a line break
$this->output = substr($this->output, 0, -3) . "<br>";
break;
default: // Delete empty elements
$this->output = substr($this->output, 0, -strlen("<{$tag}>"));
break;
}
} else {
$this->output .= "</{$tag}>";
}
$this->openedTags[$tag] = false;
}
}
/**
* Closes all opened tags
*
* @return void
*/
protected function closeTags()
{
// Close all opened tags
foreach ($this->openedTags as $tag => $b) {
$this->closeTag($tag);
}
}
protected function formatControlSymbol($symbol)
{
if ($symbol->symbol == '\'') {
$enc = $this->getSourceEncoding();
$uchar = $this->decodeUnicode($symbol->parameter, $enc);
$this->write($uchar);
} elseif ($symbol->symbol == '~') {
$this->write("&nbsp;"); // Non breaking space
} elseif ($symbol->symbol == '-') {
$this->write("&#173;"); // Optional hyphen
} elseif ($symbol->symbol == '_') {
$this->write("&#8209;"); // Non breaking hyphen
} elseif ($symbol->symbol == '{') {
$this->write("{"); // Non breaking hyphen
}
}
protected function formatText($text)
{
// Convert special characters to HTML entities
$txt = htmlspecialchars($text->text, ENT_NOQUOTES, 'UTF-8');
if ($this->encoding == 'HTML-ENTITIES') {
$this->write($txt);
} else {
$this->write(mb_convert_encoding($txt, $this->encoding, 'UTF-8'));
}
}
protected function getSourceEncoding()
{
if (isset($this->state->font)) {
if (isset(State::$fonttbl[$this->state->font]->codepage)) {
return State::$fonttbl[$this->state->font]->codepage;
}
if (isset(State::$fonttbl[$this->state->font]->charset)) {
return State::$fonttbl[$this->state->font]->charset;
}
}
return $this->rtfEncoding;
}
/**
* Convert RTF charset identifier into an encoding name (for iconv)
*
* @param int $charset Charset identifier
*
* @return string|null Encoding name or NULL on unknown CodePage
*/
protected function getEncodingFromCharset($charset)
{
// maps windows character sets to iconv encoding names
$map = array (
0 => 'CP1252', // ANSI: Western Europe
1 => 'CP1252', //*Default
2 => 'CP1252', //*Symbol
3 => null, // Invalid
77 => 'MAC', //*also [MacRoman]: Macintosh
128 => 'CP932', //*or [Shift_JIS]?: Japanese
129 => 'CP949', //*also [UHC]: Korean (Hangul)
130 => 'CP1361', //*also [JOHAB]: Korean (Johab)
134 => 'CP936', //*or [GB2312]?: Simplified Chinese
136 => 'CP950', //*or [BIG5]?: Traditional Chinese
161 => 'CP1253', // Greek
162 => 'CP1254', // Turkish (latin 5)
163 => 'CP1258', // Vietnamese
177 => 'CP1255', // Hebrew
178 => 'CP1256', // Simplified Arabic
179 => 'CP1256', //*Traditional Arabic
180 => 'CP1256', //*Arabic User
181 => 'CP1255', //*Hebrew User
186 => 'CP1257', // Baltic
204 => 'CP1251', // Russian (Cyrillic)
222 => 'CP874', // Thai
238 => 'CP1250', // Eastern European (latin 2)
254 => 'CP437', //*also [IBM437][437]: PC437
255 => 'CP437', //*OEM still PC437
);
if (isset($map[$charset])) {
return $map[$charset];
}
}
/**
* Convert RTF CodePage identifier into an encoding name (for iconv)
*
* @param string $cpg CodePage identifier
*
* @return string|null Encoding name or NULL on unknown CodePage
*/
protected function getEncodingFromCodepage($cpg)
{
$map = array (
'ansi' => 'CP1252',
'mac' => 'MAC',
'pc' => 'CP437',
'pca' => 'CP850',
437 => 'CP437', // United States IBM
708 => 'ASMO-708', // also [ISO-8859-6][ARABIC] Arabic
/* Not supported by iconv
709, => '' // Arabic (ASMO 449+, BCON V4)
710, => '' // Arabic (transparent Arabic)
711, => '' // Arabic (Nafitha Enhanced)
720, => '' // Arabic (transparent ASMO)
*/
819 => 'CP819', // Windows 3.1 (US and Western Europe)
850 => 'CP850', // IBM multilingual
852 => 'CP852', // Eastern European
860 => 'CP860', // Portuguese
862 => 'CP862', // Hebrew
863 => 'CP863', // French Canadian
864 => 'CP864', // Arabic
865 => 'CP865', // Norwegian
866 => 'CP866', // Soviet Union
874 => 'CP874', // Thai
932 => 'CP932', // Japanese
936 => 'CP936', // Simplified Chinese
949 => 'CP949', // Korean
950 => 'CP950', // Traditional Chinese
1250 => 'CP1250', // Windows 3.1 (Eastern European)
1251 => 'CP1251', // Windows 3.1 (Cyrillic)
1252 => 'CP1252', // Western European
1253 => 'CP1253', // Greek
1254 => 'CP1254', // Turkish
1255 => 'CP1255', // Hebrew
1256 => 'CP1256', // Arabic
1257 => 'CP1257', // Baltic
1258 => 'CP1258', // Vietnamese
1361 => 'CP1361', // Johab
);
if (isset($map[$cpg])) {
return $map[$cpg];
}
}
protected function ordUtf8($chr)
{
$ord0 = ord($chr);
if ($ord0 >= 0 && $ord0 <= 127) {
return $ord0;
}
$ord1 = ord($chr[1]);
if ($ord0 >= 192 && $ord0 <= 223) {
return ($ord0 - 192) * 64 + ($ord1 - 128);
}
$ord2 = ord($chr[2]);
if ($ord0 >= 224 && $ord0 <= 239) {
return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128);
}
$ord3 = ord($chr[3]);
if ($ord0 >= 240 && $ord0 <= 247) {
return ($ord0 - 240) * 262144 + ($ord1 - 128) * 4096 + ($ord2 - 128) * 64 + ($ord3 - 128);
}
$ord4 = ord($chr[4]);
if ($ord0 >= 248 && $ord0 <= 251) {
return ($ord0 - 248) * 16777216 + ($ord1 - 128) * 262144 + ($ord2 - 128) * 4096 + ($ord3 - 128) * 64 + ($ord4 - 128);
}
if ($ord0 >= 252 && $ord0 <= 253) {
return ($ord0 - 252) * 1073741824 + ($ord1 - 128) * 16777216 + ($ord2 - 128) * 262144 + ($ord3 - 128) * 4096 + ($ord4 - 128) * 64 + (ord($chr[5]) - 128);
}
// trigger_error("Invalid Unicode character: {$chr}");
}
}
@@ -0,0 +1,56 @@
<?php
namespace RtfHtmlPhp\Html;
class Image
{
/**
* Object constructor.
*/
public function __construct()
{
$this->reset();
}
/**
* Resets the object to the initial state
*
* @return void
*/
public function reset()
{
$this->format = 'bmp';
$this->width = 0; // in xExt if wmetafile otherwise in px
$this->height = 0; // in yExt if wmetafile otherwise in px
$this->goalWidth = 0; // in twips
$this->goalHeight = 0; // in twips
$this->pcScaleX = 100; // 100%
$this->pcScaleY = 100; // 100%
$this->binarySize = null; // Number of bytes of the binary data
$this->imageData = null; // Binary or Hexadecimal Data
}
/**
* Generate a HTML content for the image
*
* @return string <img> tag content, An empty string for unsupported/empty image
*/
public function printImage()
{
// process binary data
if (isset($this->binarySize)) {
// Not implemented
return '';
}
if (empty($this->imageData)) {
return '';
}
// process hexadecimal data
$data = base64_encode(pack('H*', $this->imageData));
// <img src="data:image/{FORMAT};base64,{#BDATA}" />
return "<img src=\"data:image/{$this->format};base64,{$data}\" />";
}
}
@@ -0,0 +1,163 @@
<?php
namespace RtfHtmlPhp\Html;
class State
{
public static $fonttbl = [];
public static $colortbl = [];
protected static $highlight = [
1 => 'Black',
2 => 'Blue',
3 => 'Cyan',
4 => 'Green',
5 => 'Magenta',
6 => 'Red',
7 => 'Yellow',
8 => 'Unused',
9 => 'DarkBlue',
10 => 'DarkCyan',
11 => 'DarkGreen',
12 => 'DarkMagenta',
13 => 'DarkRed',
14 => 'DarkYellow',
15 => 'DarkGray',
16 => 'LightGray'
];
/**
* Object constructor
*/
public function __construct()
{
$this->reset();
}
/**
* Store a font in the font table at the specified index.
*
* @param int $index Font number
* @param Font $font Font object
*
* @return void
*/
public static function setFont($index, Font $font)
{
State::$fonttbl[$index] = $font;
}
/**
* Resets the object to the initial state
*
* @param string|null $defaultFont Font name
*
* @return void
*/
public function reset($defaultFont = null)
{
$this->bold = false;
$this->italic = false;
$this->underline = false;
$this->strike = false;
$this->hidden = false;
$this->fontsize = 0;
$this->fontcolor = null;
$this->background = null;
$this->hcolor = null;
$this->font = isset($defaultFont) ? $defaultFont : null;
$this->htmlrtf = false;
}
/**
* Generates css style for the state.
*
* @return string The css string
*/
public function printStyle()
{
$style = [];
if ($this->bold) {
$style[] = "font-weight:bold";
}
if ($this->italic) {
$style[] = "font-style:italic";
}
if ($this->underline) {
$style[] = "text-decoration:underline";
}
// state->underline is a toggle switch variable so no need for
// a dedicated state->end_underline variable
// if($this->state->end_underline) {$span .= "text-decoration:none";}
if ($this->strike) {
$style .= "text-decoration:line-through";
}
if ($this->hidden) {
$style .= "display:none";
}
if (isset($this->font)) {
$font = self::$fonttbl[$this->font];
$style[] = $font->toStyle();
}
if ($this->fontsize != 0) {
$style[] = "font-size:{$this->fontsize}px";
}
// Font color:
if (isset($this->fontcolor)) {
// Check if color is set. in particular when it's the 'auto' color
if (array_key_exists($this->fontcolor, self::$colortbl) && self::$colortbl[$this->fontcolor]) {
$style[] = "color:" . self::$colortbl[$this->fontcolor];
}
}
// Background color:
if (isset($this->background)) {
// Check if color is set. in particular when it's the 'auto' color
if (array_key_exists($this->background, self::$colortbl) && self::$colortbl[$this->background]) {
$style[] = "background-color:" . self::$colortbl[$this->background];
}
} elseif (isset($this->hcolor)) {
// Highlight color:
if (array_key_exists($this->hcolor, self::$highlight) && self::$highlight[$this->hcolor]) {
$style[] = "background-color:" . self::$highlight[$this->hcolor];
}
}
return empty($style) ? '' : implode(';', $style) . ';';
}
/**
* Check whether this State is equal to another State.
*
* @param State $state A state to compare with
*
* @return bool True if the state is identical, False otherwise
*/
public function equals($state)
{
if (!($state instanceof State)) {
return false;
}
return $this->bold == $state->bold
&& $this->italic == $state->italic
&& $this->underline == $state->underline
&& $this->strike == $state->strike
&& $this->hidden == $state->hidden
&& $this->fontsize == $state->fontsize
// Compare colors
&& $this->fontcolor == $state->fontcolor
&& $this->background == $state->background
&& $this->hcolor == $state->hcolor
// Compare fonts
&& $this->font == $state->font;
}
}
+30
View File
@@ -0,0 +1,30 @@
<?php
namespace RtfHtmlPhp;
class Text extends Element
{
public $text;
/**
* Create a new Text instance with string content.
*
* @param string $text The content
*/
public function __construct($text)
{
$this->text = $text;
}
/**
* Returns string representation of the object for debug purposes
*
* @param int $level Indentation level
*
* @return string
*/
public function toString($level)
{
return str_repeat(" ", $level) . "TEXT {$this->text}\n";
}
}