PhoneNumberMatcher.php 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. <?php
  2. namespace libphonenumber;
  3. use libphonenumber\Leniency\AbstractLeniency;
  4. /**
  5. * A class that finds and extracts telephone numbers from $text.
  6. * Instances can be created using PhoneNumberUtil::findNumbers()
  7. *
  8. * Vanity numbers (phone numbers using alphabetic digits such as '1-800-SIX-FLAGS' are
  9. * not found.
  10. *
  11. * @package libphonenumber
  12. */
  13. class PhoneNumberMatcher implements \Iterator
  14. {
  15. protected static $initialized = false;
  16. /**
  17. * The phone number pattern used by $this->find(), similar to
  18. * PhoneNumberUtil::VALID_PHONE_NUMBER, but with the following differences:
  19. * <ul>
  20. * <li>All captures are limited in order to place an upper bound to the text matched by the
  21. * pattern.
  22. * <ul>
  23. * <li>Leading punctuation / plus signs are limited.
  24. * <li>Consecutive occurrences of punctuation are limited.
  25. * <li>Number of digits is limited.
  26. * </ul>
  27. * <li>No whitespace is allowed at the start or end.
  28. * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
  29. * </ul>
  30. *
  31. * @var string
  32. */
  33. protected static $pattern;
  34. /**
  35. * Matches strings that look like publication pages. Example:
  36. * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
  37. * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
  38. *
  39. * The string "211-227 (2003)" is not a telephone number.
  40. *
  41. * @var string
  42. */
  43. protected static $pubPages = "\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}";
  44. /**
  45. * Matches strings that look like dates using "/" as a separator. Examples 3/10/2011, 31/10/2011 or
  46. * 08/31/95.
  47. *
  48. * @var string
  49. */
  50. protected static $slashSeparatedDates = "(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}";
  51. /**
  52. * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
  53. * trailing ":\d\d" -- that is covered by timeStampsSuffix.
  54. *
  55. * @var string
  56. */
  57. protected static $timeStamps = "[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$";
  58. protected static $timeStampsSuffix = ":[0-5]\\d";
  59. /**
  60. * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
  61. * This also checks that there is something inside the brackets. Having no brackets at all is also
  62. * fine.
  63. *
  64. * @var string
  65. */
  66. protected static $matchingBrackets;
  67. /**
  68. * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
  69. * ordered according to specificity. For example, white-space is last since that is frequently
  70. * used in numbers, not just to separate two numbers. We have separate patterns since we don't
  71. * want to break up the phone-number-like text on more than one different kind of symbol at one
  72. * time, although symbols of the same type (e.g. space) can be safely grouped together.
  73. *
  74. * Note that if there is a match, we will always check any text found up to the first match as
  75. * well.
  76. *
  77. * @var string[]
  78. */
  79. protected static $innerMatches = array();
  80. /**
  81. * Punctuation that may be at the start of a phone number - brackets and plus signs.
  82. *
  83. * @var string
  84. */
  85. protected static $leadClass;
  86. /**
  87. * Prefix of the files
  88. * @var string
  89. */
  90. protected static $alternateFormatsFilePrefix;
  91. protected static function init()
  92. {
  93. static::$alternateFormatsFilePrefix = __DIR__ . '/data/PhoneNumberAlternateFormats';
  94. static::$innerMatches = array(
  95. // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
  96. '/+(.*)',
  97. // Note that the bracket here is inside the capturing group, since we consider it part of the
  98. // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
  99. "(\\([^(]*)",
  100. // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
  101. // We require a space on either side of the hyphen for it to be considered a separator.
  102. "(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)",
  103. // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
  104. // possible that it's supposed to be used to break two numbers without spaces, and we haven't
  105. // seen many instances of it used within a number.
  106. "[‒-―-]\\p{Z}*(.+)",
  107. // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
  108. "\\.+\\p{Z}*([^.]+)",
  109. // Breaks on space - e.g. "3324451234 8002341234"
  110. "\\p{Z}+(\\P{Z}+)"
  111. );
  112. /*
  113. * Builds the matchingBrackets and pattern regular expressions. The building blocks exist
  114. * to make the pattern more easily understood.
  115. */
  116. $openingParens = "(\\[\xEF\xBC\x88\xEF\xBC\xBB";
  117. $closingParens = ")\\]\xEF\xBC\x89\xEF\xBC\xBD";
  118. $nonParens = '[^' . $openingParens . $closingParens . ']';
  119. // Limit on the number of pairs of brackets in a phone number.
  120. $bracketPairLimit = static::limit(0, 3);
  121. /*
  122. * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's
  123. * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
  124. * closing bracket first. We limit the sets of brackets in a phone number to four.
  125. */
  126. static::$matchingBrackets =
  127. '(?:[' . $openingParens . '])?' . '(?:' . $nonParens . '+' . '[' . $closingParens . '])?'
  128. . $nonParens . '+'
  129. . '(?:[' . $openingParens . ']' . $nonParens . '+[' . $closingParens . '])' . $bracketPairLimit
  130. . $nonParens . '*';
  131. // Limit on the number of leading (plus) characters.
  132. $leadLimit = static::limit(0, 2);
  133. // Limit on the number of consecutive punctuation characters.
  134. $punctuationLimit = static::limit(0, 4);
  135. /*
  136. * The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
  137. * single block, set high enough to accommodate the entire national number and the international
  138. * country code
  139. */
  140. $digitBlockLimit = PhoneNumberUtil::MAX_LENGTH_FOR_NSN + PhoneNumberUtil::MAX_LENGTH_COUNTRY_CODE;
  141. /*
  142. * Limit on the number of blocks separated by the punctuation. Uses digitBlockLimit since some
  143. * formats use spaces to separate each digit
  144. */
  145. $blockLimit = static::limit(0, $digitBlockLimit);
  146. // A punctuation sequence allowing white space
  147. $punctuation = '[' . PhoneNumberUtil::VALID_PUNCTUATION . ']' . $punctuationLimit;
  148. // A digits block without punctuation.
  149. $digitSequence = "\\p{Nd}" . static::limit(1, $digitBlockLimit);
  150. $leadClassChars = $openingParens . PhoneNumberUtil::PLUS_CHARS;
  151. $leadClass = '[' . $leadClassChars . ']';
  152. static::$leadClass = $leadClass;
  153. // Init extension patterns from PhoneNumberUtil
  154. PhoneNumberUtil::initExtnPatterns();
  155. // Phone number pattern allowing optional punctuation.
  156. static::$pattern = '(?:' . $leadClass . $punctuation . ')' . $leadLimit
  157. . $digitSequence . '(?:' . $punctuation . $digitSequence . ')' . $blockLimit
  158. . '(?:' . PhoneNumberUtil::$EXTN_PATTERNS_FOR_MATCHING . ')?';
  159. static::$initialized = true;
  160. }
  161. /**
  162. * Helper function to generate regular expression with an upper and lower limit.
  163. *
  164. * @param int $lower
  165. * @param int $upper
  166. * @return string
  167. */
  168. protected static function limit($lower, $upper)
  169. {
  170. if (($lower < 0) || ($upper <= 0) || ($upper < $lower)) {
  171. throw new \InvalidArgumentException();
  172. }
  173. return '{' . $lower . ',' . $upper . '}';
  174. }
  175. /**
  176. * The phone number utility.
  177. * @var PhoneNumberUtil
  178. */
  179. protected $phoneUtil;
  180. /**
  181. * The text searched for phone numbers.
  182. * @var string
  183. */
  184. protected $text;
  185. /**
  186. * The region (country) to assume for phone numbers without an international prefix, possibly
  187. * null.
  188. * @var string
  189. */
  190. protected $preferredRegion;
  191. /**
  192. * The degrees of validation requested.
  193. * @var AbstractLeniency
  194. */
  195. protected $leniency;
  196. /**
  197. * The maximum number of retires after matching an invalid number.
  198. * @var int
  199. */
  200. protected $maxTries;
  201. /**
  202. * One of:
  203. * - NOT_READY
  204. * - READY
  205. * - DONE
  206. * @var string
  207. */
  208. protected $state = 'NOT_READY';
  209. /**
  210. * The last successful match, null unless $this->state = READY
  211. * @var PhoneNumberMatch
  212. */
  213. protected $lastMatch;
  214. /**
  215. * The next index to start searching at. Undefined when $this->state = DONE
  216. * @var int
  217. */
  218. protected $searchIndex = 0;
  219. /**
  220. * Creates a new instance. See the factory methods in PhoneNumberUtil on how to obtain a new instance.
  221. *
  222. *
  223. * @param PhoneNumberUtil $util The Phone Number Util to use
  224. * @param string|null $text The text that we will search, null for no text
  225. * @param string|null $country The country to assume for phone numbers not written in international format.
  226. * (with a leading plus, or with the international dialling prefix of the specified region).
  227. * May be null, or "ZZ" if only numbers with a leading plus should be considered.
  228. * @param AbstractLeniency $leniency The leniency to use when evaluating candidate phone numbers
  229. * @param int $maxTries The maximum number of invalid numbers to try before giving up on the text.
  230. * This is to cover degenerate cases where the text has a lot of false positives in it. Must be >= 0
  231. * @throws \InvalidArgumentException
  232. */
  233. public function __construct(PhoneNumberUtil $util, $text, $country, AbstractLeniency $leniency, $maxTries)
  234. {
  235. if ($maxTries < 0) {
  236. throw new \InvalidArgumentException();
  237. }
  238. $this->phoneUtil = $util;
  239. $this->text = ($text !== null) ? $text : '';
  240. $this->preferredRegion = $country;
  241. $this->leniency = $leniency;
  242. $this->maxTries = $maxTries;
  243. if (static::$initialized === false) {
  244. static::init();
  245. }
  246. }
  247. /**
  248. * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
  249. * that represents a phone number. Returns the next match, null if none was found.
  250. *
  251. * @param int $index The search index to start searching at
  252. * @return PhoneNumberMatch|null The Phone Number Match found, null if none can be found
  253. */
  254. protected function find($index)
  255. {
  256. $matcher = new Matcher(static::$pattern, $this->text);
  257. while (($this->maxTries > 0) && $matcher->find($index)) {
  258. $start = $matcher->start();
  259. $cutLength = $matcher->end() - $start;
  260. $candidate = \mb_substr($this->text, $start, $cutLength);
  261. // Check for extra numbers at the end.
  262. // TODO: This is the place to start when trying to support extraction of multiple phone number
  263. // from split notations (+41 49 123 45 67 / 68).
  264. $candidate = static::trimAfterFirstMatch(PhoneNumberUtil::$SECOND_NUMBER_START_PATTERN, $candidate);
  265. $match = $this->extractMatch($candidate, $start);
  266. if ($match !== null) {
  267. return $match;
  268. }
  269. $index = $start + \mb_strlen($candidate);
  270. $this->maxTries--;
  271. }
  272. return null;
  273. }
  274. /**
  275. * Trims away any characters after the first match of $pattern in $candidate,
  276. * returning the trimmed version.
  277. *
  278. * @param string $pattern
  279. * @param string $candidate
  280. * @return string
  281. */
  282. protected static function trimAfterFirstMatch($pattern, $candidate)
  283. {
  284. $trailingCharsMatcher = new Matcher($pattern, $candidate);
  285. if ($trailingCharsMatcher->find()) {
  286. $startChar = $trailingCharsMatcher->start();
  287. $candidate = \mb_substr($candidate, 0, $startChar);
  288. }
  289. return $candidate;
  290. }
  291. /**
  292. * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
  293. * combining marks should also return true since we assume they have been added to a preceding
  294. * Latin character.
  295. *
  296. * @param string $letter
  297. * @return bool
  298. * @internal
  299. */
  300. public static function isLatinLetter($letter)
  301. {
  302. // Combining marks are a subset of non-spacing-mark.
  303. if (\preg_match('/\p{L}/u', $letter) !== 1 && \preg_match('/\p{Mn}/u', $letter) !== 1) {
  304. return false;
  305. }
  306. return (\preg_match('/\p{Latin}/u', $letter) === 1)
  307. || (\preg_match('/\pM+/u', $letter) === 1);
  308. }
  309. /**
  310. * @param string $character
  311. * @return bool
  312. */
  313. protected static function isInvalidPunctuationSymbol($character)
  314. {
  315. return $character == '%' || \preg_match('/\p{Sc}/u', $character);
  316. }
  317. /**
  318. * Attempts to extract a match from a $candidate.
  319. *
  320. * @param string $candidate The candidate text that might contain a phone number
  321. * @param int $offset The offset of $candidate within $this->text
  322. * @return PhoneNumberMatch|null The match found, null if none can be found
  323. */
  324. protected function extractMatch($candidate, $offset)
  325. {
  326. // Skip a match that is more likely to be a date.
  327. $dateMatcher = new Matcher(static::$slashSeparatedDates, $candidate);
  328. if ($dateMatcher->find()) {
  329. return null;
  330. }
  331. // Skip potential time-stamps.
  332. $timeStampMatcher = new Matcher(static::$timeStamps, $candidate);
  333. if ($timeStampMatcher->find()) {
  334. $followingText = \mb_substr($this->text, $offset + \mb_strlen($candidate));
  335. $timeStampSuffixMatcher = new Matcher(static::$timeStampsSuffix, $followingText);
  336. if ($timeStampSuffixMatcher->lookingAt()) {
  337. return null;
  338. }
  339. }
  340. // Try to come up with a valid match given the entire candidate.
  341. $match = $this->parseAndVerify($candidate, $offset);
  342. if ($match !== null) {
  343. return $match;
  344. }
  345. // If that failed, try to find an "inner match" - there might be a phone number within this
  346. // candidate.
  347. return $this->extractInnerMatch($candidate, $offset);
  348. }
  349. /**
  350. * Attempts to extract a match from $candidate if the whole candidate does not qualify as a
  351. * match.
  352. *
  353. * @param string $candidate The candidate text that might contact a phone number
  354. * @param int $offset The current offset of $candidate within $this->text
  355. * @return PhoneNumberMatch|null The match found, null if none can be found
  356. */
  357. protected function extractInnerMatch($candidate, $offset)
  358. {
  359. foreach (static::$innerMatches as $possibleInnerMatch) {
  360. $groupMatcher = new Matcher($possibleInnerMatch, $candidate);
  361. $isFirstMatch = true;
  362. while ($groupMatcher->find() && $this->maxTries > 0) {
  363. if ($isFirstMatch) {
  364. // We should handle any group before this one too.
  365. $group = static::trimAfterFirstMatch(
  366. PhoneNumberUtil::$UNWANTED_END_CHAR_PATTERN,
  367. \mb_substr($candidate, 0, $groupMatcher->start())
  368. );
  369. $match = $this->parseAndVerify($group, $offset);
  370. if ($match !== null) {
  371. return $match;
  372. }
  373. $this->maxTries--;
  374. $isFirstMatch = false;
  375. }
  376. $group = static::trimAfterFirstMatch(
  377. PhoneNumberUtil::$UNWANTED_END_CHAR_PATTERN,
  378. $groupMatcher->group(1)
  379. );
  380. $match = $this->parseAndVerify($group, $offset + $groupMatcher->start(1));
  381. if ($match !== null) {
  382. return $match;
  383. }
  384. $this->maxTries--;
  385. }
  386. }
  387. return null;
  388. }
  389. /**
  390. * Parses a phone number from the $candidate} using PhoneNumberUtil::parse() and
  391. * verifies it matches the requested leniency. If parsing and verification succeed, a
  392. * corresponding PhoneNumberMatch is returned, otherwise this method returns null.
  393. *
  394. * @param string $candidate The candidate match
  395. * @param int $offset The offset of $candidate within $this->text
  396. * @return PhoneNumberMatch|null The parsed and validated phone number match, or null
  397. */
  398. protected function parseAndVerify($candidate, $offset)
  399. {
  400. try {
  401. // Check the candidate doesn't contain any formatting which would indicate that it really
  402. // isn't a phone number
  403. $matchingBracketsMatcher = new Matcher(static::$matchingBrackets, $candidate);
  404. $pubPagesMatcher = new Matcher(static::$pubPages, $candidate);
  405. if (!$matchingBracketsMatcher->matches() || $pubPagesMatcher->find()) {
  406. return null;
  407. }
  408. // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
  409. // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
  410. if ($this->leniency->compareTo(Leniency::VALID()) >= 0) {
  411. // If the candidate is not at the start of the text, and does not start with phone-number
  412. // punctuation, check the previous character.
  413. $leadClassMatcher = new Matcher(static::$leadClass, $candidate);
  414. if ($offset > 0 && !$leadClassMatcher->lookingAt()) {
  415. $previousChar = \mb_substr($this->text, $offset - 1, 1);
  416. // We return null if it is a latin letter or an invalid punctuation symbol.
  417. if (static::isInvalidPunctuationSymbol($previousChar) || static::isLatinLetter($previousChar)) {
  418. return null;
  419. }
  420. }
  421. $lastCharIndex = $offset + \mb_strlen($candidate);
  422. if ($lastCharIndex < \mb_strlen($this->text)) {
  423. $nextChar = \mb_substr($this->text, $lastCharIndex, 1);
  424. if (static::isInvalidPunctuationSymbol($nextChar) || static::isLatinLetter($nextChar)) {
  425. return null;
  426. }
  427. }
  428. }
  429. $number = $this->phoneUtil->parseAndKeepRawInput($candidate, $this->preferredRegion);
  430. if ($this->leniency->verify($number, $candidate, $this->phoneUtil)) {
  431. // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
  432. // values parsed. TODO: stop clearing all values here and switch all users over
  433. // to using rawInput() rather than the rawString() of PhoneNumberMatch
  434. $number->clearCountryCodeSource();
  435. $number->clearRawInput();
  436. $number->clearPreferredDomesticCarrierCode();
  437. return new PhoneNumberMatch($offset, $candidate, $number);
  438. }
  439. } catch (NumberParseException $e) {
  440. // ignore and continue
  441. }
  442. return null;
  443. }
  444. /**
  445. * @param PhoneNumberUtil $util
  446. * @param PhoneNumber $number
  447. * @param string $normalizedCandidate
  448. * @param string[] $formattedNumberGroups
  449. * @return bool
  450. */
  451. public static function allNumberGroupsRemainGrouped(
  452. PhoneNumberUtil $util,
  453. PhoneNumber $number,
  454. $normalizedCandidate,
  455. $formattedNumberGroups
  456. ) {
  457. $fromIndex = 0;
  458. if ($number->getCountryCodeSource() !== CountryCodeSource::FROM_DEFAULT_COUNTRY) {
  459. // First skip the country code if the normalized candidate contained it.
  460. $countryCode = $number->getCountryCode();
  461. $fromIndex = \mb_strpos($normalizedCandidate, $countryCode) + \mb_strlen($countryCode);
  462. }
  463. // Check each group of consecutive digits are not broken into separate groupings in the
  464. // $normalizedCandidate string.
  465. $formattedNumberGroupsLength = \count($formattedNumberGroups);
  466. for ($i = 0; $i < $formattedNumberGroupsLength; $i++) {
  467. // Fails if the substring of $normalizedCandidate starting from $fromIndex
  468. // doesn't contain the consecutive digits in $formattedNumberGroups[$i].
  469. $fromIndex = \mb_strpos($normalizedCandidate, $formattedNumberGroups[$i], $fromIndex);
  470. if ($fromIndex === false) {
  471. return false;
  472. }
  473. // Moves $fromIndex forward.
  474. $fromIndex += \mb_strlen($formattedNumberGroups[$i]);
  475. if ($i === 0 && $fromIndex < \mb_strlen($normalizedCandidate)) {
  476. // We are at the position right after the NDC. We get the region used for formatting
  477. // information based on the country code in the phone number, rather than the number itself,
  478. // as we do not need to distinguish between different countries with the same country
  479. // calling code and this is faster.
  480. $region = $util->getRegionCodeForCountryCode($number->getCountryCode());
  481. if ($util->getNddPrefixForRegion($region, true) !== null
  482. && \is_int(\mb_substr($normalizedCandidate, $fromIndex, 1))
  483. ) {
  484. // This means there is no formatting symbol after the NDC. In this case, we only
  485. // accept the number if there is no formatting symbol at all in the number, except
  486. // for extensions. This is only important for countries with national prefixes.
  487. $nationalSignificantNumber = $util->getNationalSignificantNumber($number);
  488. return \mb_substr(
  489. \mb_substr($normalizedCandidate, $fromIndex - \mb_strlen($formattedNumberGroups[$i])),
  490. \mb_strlen($nationalSignificantNumber)
  491. ) === $nationalSignificantNumber;
  492. }
  493. }
  494. }
  495. // The check here makes sure that we haven't mistakenly already used the extension to
  496. // match the last group of the subscriber number. Note the extension cannot have
  497. // formatting in-between digits
  498. if ($number->hasExtension()) {
  499. return \mb_strpos(\mb_substr($normalizedCandidate, $fromIndex), $number->getExtension()) !== false;
  500. }
  501. return true;
  502. }
  503. /**
  504. * @param PhoneNumberUtil $util
  505. * @param PhoneNumber $number
  506. * @param string $normalizedCandidate
  507. * @param string[] $formattedNumberGroups
  508. * @return bool
  509. */
  510. public static function allNumberGroupsAreExactlyPresent(
  511. PhoneNumberUtil $util,
  512. PhoneNumber $number,
  513. $normalizedCandidate,
  514. $formattedNumberGroups
  515. ) {
  516. $candidateGroups = \preg_split(PhoneNumberUtil::NON_DIGITS_PATTERN, $normalizedCandidate);
  517. // Set this to the last group, skipping it if the number has an extension.
  518. $candidateNumberGroupIndex = $number->hasExtension() ? \count($candidateGroups) - 2 : \count($candidateGroups) - 1;
  519. // First we check if the national significant number is formatted as a block.
  520. // We use contains and not equals, since the national significant number may be present with
  521. // a prefix such as a national number prefix, or the country code itself.
  522. if (\count($candidateGroups) == 1
  523. || \mb_strpos(
  524. $candidateGroups[$candidateNumberGroupIndex],
  525. $util->getNationalSignificantNumber($number)
  526. ) !== false
  527. ) {
  528. return true;
  529. }
  530. // Starting from the end, go through in reverse, excluding the first group, and check the
  531. // candidate and number groups are the same.
  532. for ($formattedNumberGroupIndex = (\count($formattedNumberGroups) - 1);
  533. $formattedNumberGroupIndex > 0 && $candidateNumberGroupIndex >= 0;
  534. $formattedNumberGroupIndex--, $candidateNumberGroupIndex--) {
  535. if ($candidateGroups[$candidateNumberGroupIndex] != $formattedNumberGroups[$formattedNumberGroupIndex]) {
  536. return false;
  537. }
  538. }
  539. // Now check the first group. There may be a national prefix at the start, so we only check
  540. // that the candidate group ends with the formatted number group.
  541. return ($candidateNumberGroupIndex >= 0
  542. && \mb_substr(
  543. $candidateGroups[$candidateNumberGroupIndex],
  544. -\mb_strlen($formattedNumberGroups[0])
  545. ) == $formattedNumberGroups[0]);
  546. }
  547. /**
  548. * Helper method to get the national-number part of a number, formatted without any national
  549. * prefix, and return it as a set of digit blocks that would be formatted together.
  550. *
  551. * @param PhoneNumberUtil $util
  552. * @param PhoneNumber $number
  553. * @param NumberFormat $formattingPattern
  554. * @return string[]
  555. */
  556. protected static function getNationalNumberGroups(
  557. PhoneNumberUtil $util,
  558. PhoneNumber $number,
  559. NumberFormat $formattingPattern = null
  560. ) {
  561. if ($formattingPattern === null) {
  562. // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
  563. $rfc3966Format = $util->format($number, PhoneNumberFormat::RFC3966);
  564. // We remove the extension part from the formatted string before splitting it into different
  565. // groups.
  566. $endIndex = \mb_strpos($rfc3966Format, ';');
  567. if ($endIndex === false) {
  568. $endIndex = \mb_strlen($rfc3966Format);
  569. }
  570. // The country-code will have a '-' following it.
  571. $startIndex = \mb_strpos($rfc3966Format, '-') + 1;
  572. return \explode('-', \mb_substr($rfc3966Format, $startIndex, $endIndex - $startIndex));
  573. }
  574. // If a format is provided, we format the NSN only, and split that according to the separator.
  575. $nationalSignificantNumber = $util->getNationalSignificantNumber($number);
  576. return \explode('-', $util->formatNsnUsingPattern(
  577. $nationalSignificantNumber,
  578. $formattingPattern,
  579. PhoneNumberFormat::RFC3966
  580. ));
  581. }
  582. /**
  583. * @param PhoneNumber $number
  584. * @param string $candidate
  585. * @param PhoneNumberUtil $util
  586. * @param \Closure $checker
  587. * @return bool
  588. */
  589. public static function checkNumberGroupingIsValid(
  590. PhoneNumber $number,
  591. $candidate,
  592. PhoneNumberUtil $util,
  593. \Closure $checker
  594. ) {
  595. $normalizedCandidate = PhoneNumberUtil::normalizeDigits($candidate, true /* keep non-digits */);
  596. $formattedNumberGroups = static::getNationalNumberGroups($util, $number);
  597. if ($checker($util, $number, $normalizedCandidate, $formattedNumberGroups)) {
  598. return true;
  599. }
  600. // If this didn't pass, see if there are any alternative formats that match, and try them instead.
  601. $alternateFormats = static::getAlternateFormatsForCountry($number->getCountryCode());
  602. $nationalSignificantNumber = $util->getNationalSignificantNumber($number);
  603. if ($alternateFormats !== null) {
  604. foreach ($alternateFormats->numberFormats() as $alternateFormat) {
  605. if ($alternateFormat->leadingDigitsPatternSize() > 0) {
  606. // There is only one leading digits pattern for alternate formats.
  607. $pattern = $alternateFormat->getLeadingDigitsPattern(0);
  608. $nationalSignificantNumberMatcher = new Matcher($pattern, $nationalSignificantNumber);
  609. if (!$nationalSignificantNumberMatcher->lookingAt()) {
  610. // Leading digits don't match; try another one.
  611. continue;
  612. }
  613. }
  614. $formattedNumberGroups = static::getNationalNumberGroups($util, $number, $alternateFormat);
  615. if ($checker($util, $number, $normalizedCandidate, $formattedNumberGroups)) {
  616. return true;
  617. }
  618. }
  619. }
  620. return false;
  621. }
  622. /**
  623. * @param PhoneNumber $number
  624. * @param string $candidate
  625. * @return bool
  626. */
  627. public static function containsMoreThanOneSlashInNationalNumber(PhoneNumber $number, $candidate)
  628. {
  629. $firstSlashInBodyIndex = \mb_strpos($candidate, '/');
  630. if ($firstSlashInBodyIndex === false) {
  631. // No slashes, this is okay
  632. return false;
  633. }
  634. // Now look for a second one.
  635. $secondSlashInBodyIndex = \mb_strpos($candidate, '/', $firstSlashInBodyIndex + 1);
  636. if ($secondSlashInBodyIndex === false) {
  637. // Only one slash, this is okay
  638. return false;
  639. }
  640. // If the first slash is after the country calling code, this is permitted
  641. $candidateHasCountryCode = ($number->getCountryCodeSource() === CountryCodeSource::FROM_NUMBER_WITH_PLUS_SIGN
  642. || $number->getCountryCodeSource() === CountryCodeSource::FROM_NUMBER_WITHOUT_PLUS_SIGN);
  643. if ($candidateHasCountryCode
  644. && PhoneNumberUtil::normalizeDigitsOnly(
  645. \mb_substr($candidate, 0, $firstSlashInBodyIndex)
  646. ) == $number->getCountryCode()
  647. ) {
  648. // Any more slashes and this is illegal
  649. return (\mb_strpos(\mb_substr($candidate, $secondSlashInBodyIndex + 1), '/') !== false);
  650. }
  651. return true;
  652. }
  653. /**
  654. * @param PhoneNumber $number
  655. * @param string $candidate
  656. * @param PhoneNumberUtil $util
  657. * @return bool
  658. */
  659. public static function containsOnlyValidXChars(PhoneNumber $number, $candidate, PhoneNumberUtil $util)
  660. {
  661. // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
  662. // national significant number or (2) an extension sign, in which case they always precede the
  663. // extension number. We assume a carrier code is more than 1 digit, so the first case has to
  664. // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
  665. // or 'X'. We ignore the character if it appears as the last character of the string.
  666. $candidateLength = \mb_strlen($candidate);
  667. for ($index = 0; $index < $candidateLength - 1; $index++) {
  668. $charAtIndex = \mb_substr($candidate, $index, 1);
  669. if ($charAtIndex == 'x' || $charAtIndex == 'X') {
  670. $charAtNextIndex = \mb_substr($candidate, $index + 1, 1);
  671. if ($charAtNextIndex == 'x' || $charAtNextIndex == 'X') {
  672. // This is the carrier code case, in which the 'X's always precede the national
  673. // significant number.
  674. $index++;
  675. if ($util->isNumberMatch($number, \mb_substr($candidate, $index)) != MatchType::NSN_MATCH) {
  676. return false;
  677. }
  678. } elseif (!PhoneNumberUtil::normalizeDigitsOnly(\mb_substr(
  679. $candidate,
  680. $index
  681. )) == $number->getExtension()
  682. ) {
  683. // This is the extension sign case, in which the 'x' or 'X' should always precede the
  684. // extension number
  685. return false;
  686. }
  687. }
  688. }
  689. return true;
  690. }
  691. /**
  692. * @param PhoneNumber $number
  693. * @param PhoneNumberUtil $util
  694. * @return bool
  695. */
  696. public static function isNationalPrefixPresentIfRequired(PhoneNumber $number, PhoneNumberUtil $util)
  697. {
  698. // First, check how we deduced the country code. If it was written in international format, then
  699. // the national prefix is not required.
  700. if ($number->getCountryCodeSource() !== CountryCodeSource::FROM_DEFAULT_COUNTRY) {
  701. return true;
  702. }
  703. $phoneNumberRegion = $util->getRegionCodeForCountryCode($number->getCountryCode());
  704. $metadata = $util->getMetadataForRegion($phoneNumberRegion);
  705. if ($metadata === null) {
  706. return true;
  707. }
  708. // Check if a national prefix should be present when formatting this number.
  709. $nationalNumber = $util->getNationalSignificantNumber($number);
  710. $formatRule = $util->chooseFormattingPatternForNumber($metadata->numberFormats(), $nationalNumber);
  711. // To do this, we check that a national prefix formatting rule was present and that it wasn't
  712. // just the first-group symbol ($1) with punctuation.
  713. if (($formatRule !== null) && $formatRule->getNationalPrefixFormattingRule() !== '') {
  714. if ($formatRule->getNationalPrefixOptionalWhenFormatting()) {
  715. // The national-prefix is optional in these cases, so we don't need to check if it was
  716. // present.
  717. return true;
  718. }
  719. if (PhoneNumberUtil::formattingRuleHasFirstGroupOnly($formatRule->getNationalPrefixFormattingRule())) {
  720. // National Prefix not needed for this number.
  721. return true;
  722. }
  723. // Normalize the remainder.
  724. $rawInputCopy = PhoneNumberUtil::normalizeDigitsOnly($number->getRawInput());
  725. $rawInput = $rawInputCopy;
  726. // Check if we found a national prefix and/or carrier code at the start of the raw input, and
  727. // return the result.
  728. $carrierCode = null;
  729. return $util->maybeStripNationalPrefixAndCarrierCode($rawInput, $metadata, $carrierCode);
  730. }
  731. return true;
  732. }
  733. /**
  734. * Storage for Alternate Formats
  735. * @var PhoneMetadata[]
  736. */
  737. protected static $callingCodeToAlternateFormatsMap = array();
  738. /**
  739. * @param $countryCallingCode
  740. * @return PhoneMetadata|null
  741. */
  742. protected static function getAlternateFormatsForCountry($countryCallingCode)
  743. {
  744. $countryCodeSet = AlternateFormatsCountryCodeSet::$alternateFormatsCountryCodeSet;
  745. if (!\in_array($countryCallingCode, $countryCodeSet)) {
  746. return null;
  747. }
  748. if (!isset(static::$callingCodeToAlternateFormatsMap[$countryCallingCode])) {
  749. static::loadAlternateFormatsMetadataFromFile($countryCallingCode);
  750. }
  751. return static::$callingCodeToAlternateFormatsMap[$countryCallingCode];
  752. }
  753. /**
  754. * @param string $countryCallingCode
  755. * @throws \Exception
  756. */
  757. protected static function loadAlternateFormatsMetadataFromFile($countryCallingCode)
  758. {
  759. $fileName = static::$alternateFormatsFilePrefix . '_' . $countryCallingCode . '.php';
  760. if (!\is_readable($fileName)) {
  761. throw new \Exception('missing metadata: ' . $fileName);
  762. }
  763. $metadataLoader = new DefaultMetadataLoader();
  764. $data = $metadataLoader->loadMetadata($fileName);
  765. $metadata = new PhoneMetadata();
  766. $metadata->fromArray($data);
  767. static::$callingCodeToAlternateFormatsMap[$countryCallingCode] = $metadata;
  768. }
  769. /**
  770. * Return the current element
  771. * @link http://php.net/manual/en/iterator.current.php
  772. * @return PhoneNumberMatch|null
  773. */
  774. #[\ReturnTypeWillChange]
  775. public function current()
  776. {
  777. return $this->lastMatch;
  778. }
  779. /**
  780. * Move forward to next element
  781. * @link http://php.net/manual/en/iterator.next.php
  782. * @return void Any returned value is ignored.
  783. */
  784. #[\ReturnTypeWillChange]
  785. public function next()
  786. {
  787. $this->lastMatch = $this->find($this->searchIndex);
  788. if ($this->lastMatch === null) {
  789. $this->state = 'DONE';
  790. } else {
  791. $this->searchIndex = $this->lastMatch->end();
  792. $this->state = 'READY';
  793. }
  794. $this->searchIndex++;
  795. }
  796. /**
  797. * Return the key of the current element
  798. * @link http://php.net/manual/en/iterator.key.php
  799. * @return mixed scalar on success, or null on failure.
  800. * @since 5.0.0
  801. */
  802. #[\ReturnTypeWillChange]
  803. public function key()
  804. {
  805. return $this->searchIndex;
  806. }
  807. /**
  808. * Checks if current position is valid
  809. * @link http://php.net/manual/en/iterator.valid.php
  810. * @return boolean The return value will be casted to boolean and then evaluated.
  811. * Returns true on success or false on failure.
  812. * @since 5.0.0
  813. */
  814. #[\ReturnTypeWillChange]
  815. public function valid()
  816. {
  817. return $this->state === 'READY';
  818. }
  819. /**
  820. * Rewind the Iterator to the first element
  821. * @link http://php.net/manual/en/iterator.rewind.php
  822. * @return void Any returned value is ignored.
  823. * @since 5.0.0
  824. */
  825. #[\ReturnTypeWillChange]
  826. public function rewind()
  827. {
  828. $this->searchIndex = 0;
  829. $this->next();
  830. }
  831. }