.
1 2 , .
/(\w+)|(\W+)/gi
, ( AS3, ruby):
class MatchedWord
{
var text:String;
var charIndex:int;
var isWord:Boolean;
var isContraction:Boolean = false;
function MatchedWord( text:String, charIndex:int, isWord:Boolean )
{
this.text = text; this.charIndex = charIndex; this.isWord = isWord;
}
}
var match:Object;
var matched_word:MatchedWord;
var matched_words:Vector.<MatchedWord> = new Vector.<MatchedWord>();
var words_regex:RegExp = /(\w+)|(\W+)/gi
words_regex.lastIndex = 0;
while ((match = words_regex.exec( original_text )) != null)
matched_words.push( new MatchedWord( match[0], match.index, match[1] != null ) );
2 2 , , , (, ) ENDS . , () , , 8 . , , 8 .
d
l
ll
m
re
s
t
ve
(-) = "'" (word) = "d", () , .
, , , - , , , "twas" "tis". () , , , , , ( , ). EQUALS , , ENDS , . , , , , ( -), , EQUALS - , .
, , - 8 , , , "g'day" "g_night". , , () . "g", .
, , .
.
Condition(Ending, PreCondition)
PreCondition -
"*", "!", or "<exact string>"
:
new Condition("d","*")
new Condition("l","*");
new Condition("ll","*");
new Condition("m","*");
new Condition("re","*");
new Condition("s","*");
new Condition("t","*");
new Condition("ve","*");
new Condition("twas","!");
new Condition("tis","!");
new Condition("day","g");
new Condition("night","g");
, , 86 ( ):
'tis' twas , , , --------- , , , , , , , , , , , , , , , , , , , , , ? , ?
, , , "gotta" > "got to" "gonna" > "going to".
Here is the final AS3 code. In general, you view less than 50 lines of code to analyze text in groups with a variable word and without words, as well as to define and combine abbreviations. Simply. You can even add the boolean variable "isContraction" to the MatchedWord class and set the flag in the code below when an abbreviation is detected.
var conditions:Array = [
["d","*"],
["l","*"],
["ll","*"],
["m","*"],
["re","*"],
["s","*"],
["t","*"],
["ve","*"],
["twas","!"],
["tis","!"],
["day","g"],
["night","g"]
];
for (i = 0; i < matched_words.length - 1; i++)
{
var m:MatchedWord = matched_words[i];
var apostrophe_text:String = StringUtils.trim( m.text );
if (!m.isWord && StringUtils.endsWith( apostrophe_text, "'" ))
{
var m_next:MatchedWord = matched_words[i + 1];
var m_prev:MatchedWord = ((i - 1) >= 0) ? matched_words[i - 1] : null;
for each (var condition:Array in conditions)
{
if (StringUtils.trim( m_next.text ) == condition[0])
{
var pre_condition:String = condition[1];
switch (pre_condition)
{
case "*":
if (m_prev != null && apostrophe_text == "'")
{
m_prev.text += m.text + m_next.text;
m_prev.isContraction = true;
matched_words.splice( i, 2 );
}
break;
case "!":
if (apostrophe_text == "'")
{
m.text += m_next.text;
m.isWord = true;
m.isContraction = true;
matched_words.splice( i + 1, 1 );
}
else
{
var apostrophe_end:int = m.text.lastIndexOf( "'" );
var apostrophe_ending:String = m.text.substring( apostrophe_end, m.text.length );
m.text = m.text.substring( 0, m.text.length - apostrophe_ending.length);
m_next.text = apostrophe_ending + m_next.text;
m_next.charIndex = m.charIndex + apostrophe_end;
m_next.isContraction = true;
}
break;
default:
if (m_prev != null && m_prev.text == pre_condition)
{
m_prev.text += m.text + m_next.text;
m_prev.isContraction = true;
matched_words.splice( i, 2 );
}
break;
}
}
}
}
}