Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 107 additions & 13 deletions src/Stringy.php
Original file line number Diff line number Diff line change
Expand Up @@ -1396,28 +1396,124 @@ public function tidy()
}

/**
* Returns a trimmed string with the first letter of each word capitalized.
* Returns a trimmed string in proper title case.
*
* Also accepts an array, $ignore, allowing you to list words not to be
* capitalized.
*
* Adapted from John Gruber's script.
*
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
*
* @param array $ignore An array of words not to capitalize
*
* @return static Object with a titleized $str
*/
public function titleize($ignore = null)
public function titleize($ignore = [])
{
$smallWords = array_merge(
['(?<!q&)a', 'an', 'and', 'as', 'at(?!&t)', 'but', 'by', 'en', 'for', 'if', 'in', 'of', 'on', 'or', 'the', 'to', 'v[.]?', 'via', 'vs[.]?'],
(array)$ignore
);

$smallWordsRx = implode('|', $smallWords);

$apostropheRx = '(?x: [\'’] [[:lower:]]* )?';

$stringy = static::create($this->trim(), $this->encoding);
$encoding = $this->encoding;

if (preg_match('/[[:lower:]]/', $stringy) === 0) {
$stringy = $stringy->toLowerCase();
}

// The main substitutions
$stringy->str = preg_replace_callback(
'/([\S]+)/u',
function ($match) use ($encoding, $ignore) {
if ($ignore && in_array($match[0], $ignore)) {
return $match[0];
'~\b (_*) (?: # 1. Leading underscore and
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or
[-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) # URL, domain, or email
|
( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' ) # 3. or small word (case-insensitive)
|
( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 4. or word w/o internal caps
|
( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 5. or some other word
) (_*) \b # 6. With trailing underscore
~ux',
function ($matches) {
// Preserve leading underscore
$str = $matches[1];

if ($matches[2]) {
// Preserve URLs, domains, emails and file paths
$str .= $matches[2];
} elseif ($matches[3]) {
// Lower-case small words
$str .= static::create($matches[3], $this->encoding)->toLowerCase();
} elseif ($matches[4]) {
// Capitalize word w/o internal caps
$str .= static::create($matches[4], $this->encoding)->upperCaseFirst();
} else {
// Preserve other kinds of word (iPhone)
$str .= $matches[5];
}

$stringy = new Stringy($match[0], $encoding);
// Preserve trailing underscore
$str .= $matches[6];

return $str;
},
$stringy->str
);

// Exceptions for small words: capitalize at start of title...
$stringy->str = preg_replace_callback(
'~( \A [[:punct:]]* # start of title...
| [:.;?!][ ]+ # or of subsentence...
| [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase...
( ' . $smallWordsRx . ' ) \b # ...followed by small word
~uxi',
function ($matches) {
return $matches[1] . static::create($matches[2], $this->encoding)->upperCaseFirst();
},
$stringy->str
);

return (string) $stringy->toLowerCase()->upperCaseFirst();
// ...and end of title
$stringy->str = preg_replace_callback(
'~\b ( ' . $smallWordsRx . ' ) # small word...
(?= [[:punct:]]* \Z # ...at the end of the title...
| [\'"’”)\]] [ ] ) # ...or of an inserted subphrase?
~uxi',
function ($matches) {
return static::create($matches[1], $this->encoding)->upperCaseFirst();
},
$stringy->str
);

// Exceptions for small words in hyphenated compound words
// e.g. "in-flight" -> In-Flight
$stringy->str = preg_replace_callback(
'~\b
(?<! -) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
( ' . $smallWordsRx . ' )
(?= -[[:alpha:]]+) # lookahead for "-someword"
~uxi',
function ($matches) {
return static::create($matches[1], $this->encoding)->upperCaseFirst();
},
$stringy->str
);

// e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
$stringy->str = preg_replace_callback(
'~\b
(?<!…) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
( [[:alpha:]]+- ) # $1 = first word and hyphen, should already be properly capped
( ' . $smallWordsRx . ' ) # ...followed by small word
(?! - ) # Negative lookahead for another -
~uxi',
function ($matches) {
return $matches[1] . static::create($matches[2], $this->encoding)->upperCaseFirst();
},
$stringy->str
);
Expand Down Expand Up @@ -1537,15 +1633,13 @@ public function toTabs($tabLength = 4)
}

/**
* Converts the first character of each word in the string to uppercase.
* Returns a trimmed string in proper title case.
*
* @return static Object with all characters of $str being title-cased
*/
public function toTitleCase()
{
$str = \mb_convert_case($this->str, \MB_CASE_TITLE, $this->encoding);

return static::create($str, $this->encoding);
return $this->titleize();
}

/**
Expand Down
46 changes: 36 additions & 10 deletions tests/StringyTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,7 @@ public function swapCaseProvider()
/**
* @dataProvider titleizeProvider()
*/
public function testTitleize($expected, $str, $ignore = null,
$encoding = null)
public function testTitleize($str, $expected, $ignore = [], $encoding = null)
{
$stringy = S::create($str, $encoding);
$result = $stringy->titleize($ignore);
Expand All @@ -565,15 +564,42 @@ public function testTitleize($expected, $str, $ignore = null,

public function titleizeProvider()
{
$ignore = ['at', 'by', 'for', 'in', 'of', 'on', 'out', 'to', 'the'];

return [
['Title Case', 'TITLE CASE'],
['Testing The Method', 'testing the method'],
['Testing the Method', 'testing the method', $ignore],
['I Like to Watch Dvds at Home', 'i like to watch DVDs at home',
$ignore],
['Θα Ήθελα Να Φύγει', ' Θα ήθελα να φύγει ', null, 'UTF-8']
['TITLE CASE', 'Title Case'],
['testing the method', 'Testing the Method'],
['i like to watch DVDs at home', 'I Like to watch DVDs at Home', ['watch']],
[' Θα ήθελα να φύγει ', 'Θα Ήθελα Να Φύγει', [], 'UTF-8'],
['For step-by-step directions email [email protected]', 'For Step-by-Step Directions Email [email protected]'],
["2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'", "2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'"],
['Have you read “The Lottery”?', 'Have You Read “The Lottery”?'],
['your hair[cut] looks (nice)', 'Your Hair[cut] Looks (Nice)'],
["People probably won't put http://foo.com/bar/ in titles", "People Probably Won't Put http://foo.com/bar/ in Titles"],
['Scott Moritz and TheStreet.com’s million iPhone la‑la land', 'Scott Moritz and TheStreet.com’s Million iPhone La‑La Land'],
['BlackBerry vs. iPhone', 'BlackBerry vs. iPhone'],
['Notes and observations regarding Apple’s announcements from ‘The Beat Goes On’ special event', 'Notes and Observations Regarding Apple’s Announcements From ‘The Beat Goes On’ Special Event'],
['Read markdown_rules.txt to find out how _underscores around words_ will be interpretted', 'Read markdown_rules.txt to Find Out How _Underscores Around Words_ Will Be Interpretted'],
["Q&A with Steve Jobs: 'That's what happens in technology'", "Q&A With Steve Jobs: 'That's What Happens in Technology'"],
["What is AT&T's problem?", "What Is AT&T's Problem?"],
['Apple deal with AT&T falls through', 'Apple Deal With AT&T Falls Through'],
['this v that', 'This v That'],
['this vs that', 'This vs That'],
['this v. that', 'This v. That'],
['this vs. that', 'This vs. That'],
["The SEC's Apple probe: what you need to know", "The SEC's Apple Probe: What You Need to Know"],
["'by the way, small word at the start but within quotes.'", "'By the Way, Small Word at the Start but Within Quotes.'"],
['Small word at end is nothing to be afraid of', 'Small Word at End Is Nothing to Be Afraid Of'],
['Starting sub-phrase with a small word: a trick, perhaps?', 'Starting Sub-Phrase With a Small Word: A Trick, Perhaps?'],
["Sub-phrase with a small word in quotes: 'a trick, perhaps?'", "Sub-Phrase With a Small Word in Quotes: 'A Trick, Perhaps?'"],
['Sub-phrase with a small word in quotes: "a trick, perhaps?"', 'Sub-Phrase With a Small Word in Quotes: "A Trick, Perhaps?"'],
['"Nothing to Be Afraid of?"', '"Nothing to Be Afraid Of?"'],
['a thing', 'A Thing'],
['Dr. Strangelove (or: how I Learned to Stop Worrying and Love the Bomb)', 'Dr. Strangelove (Or: How I Learned to Stop Worrying and Love the Bomb)'],
[' this is trimming', 'This Is Trimming'],
['this is trimming ', 'This Is Trimming'],
[' this is trimming ', 'This Is Trimming'],
['IF IT’S ALL CAPS, FIX IT', 'If It’s All Caps, Fix It'],
['What could/should be done about slashes?', 'What Could/Should Be Done About Slashes?'],
['Never touch paths like /var/run before/after /boot', 'Never Touch Paths Like /var/run Before/After /boot'],
];
}

Expand Down