استفاده و توسعه بدون مرز با مجوز MIT
با حجم کمتر از 40 کیلوبایت
پردازش سریع در کمتر از 0.02 میلیثانیه
استفاده آسان با تنها 2 خط کد!
composer require alirezasedghi/virastar
// Require Composer's autoloader.
require 'vendor/autoload.php';
// Using Virastar namespace.
use Alirezasedghi\Virastar\Virastar;
$virastar = new Virastar();
$text = "فارسي را كمی درست تر می نويسيم";
$cleaned = $virastar->cleanup($text);
echo $cleaned; // Outputs: "فارسی را کمی درستتر مینویسیم"
نوع: آرایه
$virastar = new Virastar([
"fix_english_numbers": false,
"cleanup_line_breaks": false
]);
normalize_eol
پیشفرض: true
جایگزینی انتهای خطوط ویندوز با eol یونیکس (\n)
decode_html_entities
پیشفرض: true
converts numeral and selected html character-sets into original characters
fix_dashes
پیشفرض: true
replaces triple dash to mdash
replaces double dash to ndash
fix_three_dots
پیشفرض: true
removes spaces between dots
replaces three dots with ellipsis character
normalize_ellipsis
پیشفرض: true
replaces more than one ellipsis with one
replaces (space|tab|zwnj) after ellipsis with one space
normalize_dates
پیشفرض: true
re-orders date parts with slash as delimiter
fix_english_quotes_pairs
پیشفرض: true
replaces english quote pairs (“”) with their persian equivalent («»)
fix_english_quotes
پیشفرض: true
replaces english quote marks with their persian equivalent
fix_hamzeh
پیشفرض: true
replaces ه followed by (space|ZWNJ|lrm) follow by ی with هٔ
replaces ه followed by (space|ZWNJ|lrm|nothing) follow by ء with هٔ
replaces هٓ or single-character ۀ with the standard هٔ
fix_hamzeh_arabic
پیشفرض: false
converts arabic hamzeh ة to هٔ
cleanup_rlm
پیشفرض: true
converts Right-to-left marks followed by persian characters to zero-width non-joiners (ZWNJ)
cleanup_zwnj
پیشفرض: true
converts all soft hyphens () into zwnj
removes more than one zwnj
cleans zwnj after characters that don't connect to the next
cleans zwnj before and after numbers, english words, spaces and punctuations
removes unnecessary zwnj on start/end of each line
fix_arabic_numbers
پیشفرض: true
replaces arabic numbers with their persian equivalent
fix_english_numbers
پیشفرض: true
replaces english numbers with their persian equivalent
fix_numeral_symbols
پیشفرض: true
replaces english percent signs (U+066A)
replaces dots between numbers into decimal separator (U+066B)
replaces commas between numbers into thousands separator (U+066C)
fix_misc_non_persian_chars
پیشفرض: true
replaces arabic normal/swash kaf with its persian equivalent
replaces arabic/urdu/pushtu/uyghur yeh with its persian equivalent
replaces kurdish he with its persian equivalent
fix_punctuations
پیشفرض: true
replaces ,, ; with its persian equivalent
fix_question_mark
پیشفرض: true
replaces question marks with its persian equivalent
fix_prefix_spacing
پیشفرض: true
puts zwnj between the word and the prefix:
mi*, nemi*, bi*
fix_suffix_spacing
پیشفرض: true
puts zwnj between the word and the suffix:
*ha, *haye
*am, *at, *ash, *ei, *eid, *eem, *and, *man, *tan, *shan
*tar, *tari, *tarin
*hayee, *hayam, *hayat, *hayash, *hayetan, *hayeman, *hayeshan
fix_suffix_misc
پیشفرض: true
replaces ه followed by ئ or ی, and then by ی, with های
fix_spacing_for_braces_and_quotes
پیشفرض: true
removes inside spaces and more than one outside for (), [], {}, “” and «»
fix_spacing_for_punctuations
پیشفرض: true
removes space before punctuations
removes more than one space after punctuations, except followed by new-lines
removes space after colon that separates time parts
removes space after dots in numbers
removes space before some common domain tlds
removes space between question and exclamation marks
removes space between same marks
fix_diacritics
پیشفرض: true
cleans zwnj before diacritic characters
cleans more than one diacritic characters
clean spaces before diacritic characters
remove_diacritics
پیشفرض: false
removes all diacritic characters
fix_persian_glyphs
پیشفرض: true
converts incorrect persian glyphs to standard characters
fix_misc_spacing
پیشفرض: true
removes space before parentheses on misc cases
removes space before braces containing numbers
cleanup_spacing
پیشفرض: true
replaces more than one space with just a single one
cleans whitespace/zwnj between new-lines
cleanup_line_breaks
پیشفرض: true
cleans more than two contiguous line breaks
cleanup_begin_and_end
پیشفرض: true
removes space/tab/zwnj/nbsp from the beginning of the new-lines
remove spaces, tabs, zwnj, direction marks and new lines from the beginning and end of text
markdown_normalize_braces
پیشفرض: true
remove spaces between [] and () ([text] (link) into [text](link))
removes space between ! and opening brace (! [alt](src) into ![alt](src))
remove spaces inside double (), [], {} ([[ text ]] into [[text]])
remove spaces between double (), [], {} ([[text] ] into [[text]])
markdown_normalize_lists
پیشفرض: true
removes extra lines between two items on a markdown list beginning with -, * or #
skip_markdown_ordered_lists_numbers_conversion
پیشفرض: false
skips converting english numbers of ordered lists in markdown
cleanup_extra_marks
پیشفرض: true
replaces more than one exclamation mark with just one
replaces more than one english or persian question mark with just one
re-orders consecutive marks: ?! into !?
kashidas_as_parenthetic
پیشفرض: true
replaces kashidas to ndash in parenthetic
cleanup_kashidas
پیشفرض: true
converts kashida between numbers to ndash
removes all kashidas between non-whitespace characters
preserve_front_matter
پیشفرض: true
preserves front matter data in the text
preserve_HTML
پیشفرض: true
preserves all html tags in the text
preserve_comments
پیشفرض: true
preserves all html comments in the text
preserve_entities
پیشفرض: true
preserves all html entities in the text
preserve_URIs
پیشفرض: true
preserves all uri strings in the text
preserve_brackets
پیشفرض: false
preserves strings inside square brackets ([])
preserve_braces
پیشفرض: false
preserves strings inside curly braces ({})
preserve_nbsp
پیشفرض: true
preserves all no-break space entities in the text