diff --git a/src/Stringy.php b/src/Stringy.php index ccb6f5a..163bb87 100644 --- a/src/Stringy.php +++ b/src/Stringy.php @@ -1396,28 +1396,124 @@ public function tidy() } /** - * Returns a trimmed string with the first letter of each word capitalized. + * Returns a trimmed string in proper title case. + * * Also accepts an array, $ignore, allowing you to list words not to be * capitalized. * + * Adapted from John Gruber's script. + * + * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 + * * @param array $ignore An array of words not to capitalize + * * @return static Object with a titleized $str */ - public function titleize($ignore = null) + public function titleize($ignore = []) { + $smallWords = array_merge( + ['(?trim(), $this->encoding); - $encoding = $this->encoding; + if (preg_match('/[[:lower:]]/', $stringy) === 0) { + $stringy = $stringy->toLowerCase(); + } + + // The main substitutions $stringy->str = preg_replace_callback( - '/([\S]+)/u', - function ($match) use ($encoding, $ignore) { - if ($ignore && in_array($match[0], $ignore)) { - return $match[0]; + '~\b (_*) (?: # 1. Leading underscore and + ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or + [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) # URL, domain, or email + | + ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' ) # 3. or small word (case-insensitive) + | + ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 4. or word w/o internal caps + | + ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 5. or some other word + ) (_*) \b # 6. With trailing underscore + ~ux', + function ($matches) { + // Preserve leading underscore + $str = $matches[1]; + + if ($matches[2]) { + // Preserve URLs, domains, emails and file paths + $str .= $matches[2]; + } elseif ($matches[3]) { + // Lower-case small words + $str .= static::create($matches[3], $this->encoding)->toLowerCase(); + } elseif ($matches[4]) { + // Capitalize word w/o internal caps + $str .= static::create($matches[4], $this->encoding)->upperCaseFirst(); + } else { + // Preserve other kinds of word (iPhone) + $str .= $matches[5]; } - $stringy = new Stringy($match[0], $encoding); + // Preserve trailing underscore + $str .= $matches[6]; + + return $str; + }, + $stringy->str + ); + + // Exceptions for small words: capitalize at start of title... + $stringy->str = preg_replace_callback( + '~( \A [[:punct:]]* # start of title... + | [:.;?!][ ]+ # or of subsentence... + | [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase... + ( ' . $smallWordsRx . ' ) \b # ...followed by small word + ~uxi', + function ($matches) { + return $matches[1] . static::create($matches[2], $this->encoding)->upperCaseFirst(); + }, + $stringy->str + ); - return (string) $stringy->toLowerCase()->upperCaseFirst(); + // ...and end of title + $stringy->str = preg_replace_callback( + '~\b ( ' . $smallWordsRx . ' ) # small word... + (?= [[:punct:]]* \Z # ...at the end of the title... + | [\'"’”)\]] [ ] ) # ...or of an inserted subphrase? + ~uxi', + function ($matches) { + return static::create($matches[1], $this->encoding)->upperCaseFirst(); + }, + $stringy->str + ); + + // Exceptions for small words in hyphenated compound words + // e.g. "in-flight" -> In-Flight + $stringy->str = preg_replace_callback( + '~\b + (?encoding)->upperCaseFirst(); + }, + $stringy->str + ); + + // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point) + $stringy->str = preg_replace_callback( + '~\b + (?encoding)->upperCaseFirst(); }, $stringy->str ); @@ -1537,15 +1633,13 @@ public function toTabs($tabLength = 4) } /** - * Converts the first character of each word in the string to uppercase. + * Returns a trimmed string in proper title case. * * @return static Object with all characters of $str being title-cased */ public function toTitleCase() { - $str = \mb_convert_case($this->str, \MB_CASE_TITLE, $this->encoding); - - return static::create($str, $this->encoding); + return $this->titleize(); } /** diff --git a/tests/StringyTest.php b/tests/StringyTest.php index 97d5502..24801e7 100644 --- a/tests/StringyTest.php +++ b/tests/StringyTest.php @@ -553,8 +553,7 @@ public function swapCaseProvider() /** * @dataProvider titleizeProvider() */ - public function testTitleize($expected, $str, $ignore = null, - $encoding = null) + public function testTitleize($str, $expected, $ignore = [], $encoding = null) { $stringy = S::create($str, $encoding); $result = $stringy->titleize($ignore); @@ -565,15 +564,42 @@ public function testTitleize($expected, $str, $ignore = null, public function titleizeProvider() { - $ignore = ['at', 'by', 'for', 'in', 'of', 'on', 'out', 'to', 'the']; - return [ - ['Title Case', 'TITLE CASE'], - ['Testing The Method', 'testing the method'], - ['Testing the Method', 'testing the method', $ignore], - ['I Like to Watch Dvds at Home', 'i like to watch DVDs at home', - $ignore], - ['Θα Ήθελα Να Φύγει', ' Θα ήθελα να φύγει ', null, 'UTF-8'] + ['TITLE CASE', 'Title Case'], + ['testing the method', 'Testing the Method'], + ['i like to watch DVDs at home', 'I Like to watch DVDs at Home', ['watch']], + [' Θα ήθελα να φύγει ', 'Θα Ήθελα Να Φύγει', [], 'UTF-8'], + ['For step-by-step directions email someone@gmail.com', 'For Step-by-Step Directions Email someone@gmail.com'], + ["2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'", "2lmc Spool: 'Gruber on OmniFocus and Vapo(u)rware'"], + ['Have you read “The Lottery”?', 'Have You Read “The Lottery”?'], + ['your hair[cut] looks (nice)', 'Your Hair[cut] Looks (Nice)'], + ["People probably won't put http://foo.com/bar/ in titles", "People Probably Won't Put http://foo.com/bar/ in Titles"], + ['Scott Moritz and TheStreet.com’s million iPhone la‑la land', 'Scott Moritz and TheStreet.com’s Million iPhone La‑La Land'], + ['BlackBerry vs. iPhone', 'BlackBerry vs. iPhone'], + ['Notes and observations regarding Apple’s announcements from ‘The Beat Goes On’ special event', 'Notes and Observations Regarding Apple’s Announcements From ‘The Beat Goes On’ Special Event'], + ['Read markdown_rules.txt to find out how _underscores around words_ will be interpretted', 'Read markdown_rules.txt to Find Out How _Underscores Around Words_ Will Be Interpretted'], + ["Q&A with Steve Jobs: 'That's what happens in technology'", "Q&A With Steve Jobs: 'That's What Happens in Technology'"], + ["What is AT&T's problem?", "What Is AT&T's Problem?"], + ['Apple deal with AT&T falls through', 'Apple Deal With AT&T Falls Through'], + ['this v that', 'This v That'], + ['this vs that', 'This vs That'], + ['this v. that', 'This v. That'], + ['this vs. that', 'This vs. That'], + ["The SEC's Apple probe: what you need to know", "The SEC's Apple Probe: What You Need to Know"], + ["'by the way, small word at the start but within quotes.'", "'By the Way, Small Word at the Start but Within Quotes.'"], + ['Small word at end is nothing to be afraid of', 'Small Word at End Is Nothing to Be Afraid Of'], + ['Starting sub-phrase with a small word: a trick, perhaps?', 'Starting Sub-Phrase With a Small Word: A Trick, Perhaps?'], + ["Sub-phrase with a small word in quotes: 'a trick, perhaps?'", "Sub-Phrase With a Small Word in Quotes: 'A Trick, Perhaps?'"], + ['Sub-phrase with a small word in quotes: "a trick, perhaps?"', 'Sub-Phrase With a Small Word in Quotes: "A Trick, Perhaps?"'], + ['"Nothing to Be Afraid of?"', '"Nothing to Be Afraid Of?"'], + ['a thing', 'A Thing'], + ['Dr. Strangelove (or: how I Learned to Stop Worrying and Love the Bomb)', 'Dr. Strangelove (Or: How I Learned to Stop Worrying and Love the Bomb)'], + [' this is trimming', 'This Is Trimming'], + ['this is trimming ', 'This Is Trimming'], + [' this is trimming ', 'This Is Trimming'], + ['IF IT’S ALL CAPS, FIX IT', 'If It’s All Caps, Fix It'], + ['What could/should be done about slashes?', 'What Could/Should Be Done About Slashes?'], + ['Never touch paths like /var/run before/after /boot', 'Never Touch Paths Like /var/run Before/After /boot'], ]; }