How to detect unicode characters that are non-normalized? - unicode

Given a UTF-8 string (&str), I want to find out any range of characters that are not normalized (e.g. a\u{300} instead of \u{e0}).
How do I do this?
Edit: Thanks to DK for correcting my faulty UTF-8 sequence. The combining character comes after the a, not before.

Edit: I just realised that the reason for the results I was getting is that your example string is backwards. The combining code point should come second, not first. I've updated the answer accordingly.
Well, that depends on the definition of "normalized".
For example:
/*!
Add this to a `Cargo.toml` manifest:
```cargo
[dependencies]
unicode-normalization = "0.1.1"
```
*/
extern crate unicode_normalization;
fn main() {
for test_str in vec!["a\u{300}", "\u{e0}"] {
is_nfd(test_str);
is_nfkd(test_str);
is_nfc(test_str);
is_nfkc(test_str);
}
}
macro_rules! norm_test {
($fn_name:ident, $norm_name:ident) => {
fn $fn_name(s: &str) {
use unicode_normalization::UnicodeNormalization;
println!("is_{}({:?}):", stringify!($norm_name), s);
let is_norm = s.chars().zip(s.$norm_name())
.inspect(|&(a, b)| println!(" - ({:x}, {:x})", a as u32, b as u32))
.all(|(a, b)| a == b);
println!(" is_norm: {}", is_norm);
}
};
}
norm_test! { is_nfd, nfd }
norm_test! { is_nfkd, nfkd }
norm_test! { is_nfc, nfc }
norm_test! { is_nfkc, nfkc }
This produces the following output:
is_nfd("a\u{300}"):
- (61, 61)
- (300, 300)
is_norm: true
is_nfkd("a\u{300}"):
- (61, 61)
- (300, 300)
is_norm: true
is_nfc("a\u{300}"):
- (61, e0)
is_norm: false
is_nfkc("a\u{300}"):
- (61, e0)
is_norm: false
is_nfd("\u{e0}"):
- (e0, 61)
is_norm: false
is_nfkd("\u{e0}"):
- (e0, 61)
is_norm: false
is_nfc("\u{e0}"):
- (e0, e0)
is_norm: true
is_nfkc("\u{e0}"):
- (e0, e0)
is_norm: true
So "a\u{300}" is NFD and NFKD, whilst "\u{e0}" is NFC and NFKC. I don't know of any examples which differ between the K and non-K variants, though the Unicode FAQ on Normalization will probably explain things better than I can.

Related

Is there such function like IsCallable?

I'd like to have in AutoHotkey function named, for example IsCallable, that can tell me whether an object (or anything that can be stored in AHK variable) is callable.
A callable object includes:
a normal function represented as a string.
a function object by Func("somefuncname").
a BoundFunc Object.
The key point is, if fnobj is callable, then the user can write %fnobj%(...) to actually call something.
Clarify my idea with code below:
test_check_callable()
{
fnstr := "fnhello"
fnobjA := Func("fnhello")
fnobjB := fnobjA.bind("0th")
fnobjC := fnobjB
%fnstr%()
%fnobjA%("A1")
%fnobjB%("B1")
fnobjC.("C1", "C2")
r1 := IsCallable(fnstr) ; true
r2 := IsCallable(fnobjA) ; true
r3 := IsCallable(fnobjB) ; true
r4 := IsCallable(fnobjC) ; true
e1 := IsCallable("NoSuch") ; false
e2 := IsCallable({key1:"value1"}) ; false
}
fnhello(args*)
{
Dbgwin_Output("fnhello() parameters: " args.Length())
for i,arg in args
{
Dbgwin_Output(Format(" param{} = {}", i, arg))
}
}
IsCallable(pobj)
{
; How to implement this? IsFunc? IsObject?
}
I hope r1, r2, r3, r4 should all get true, and e1, e2 get false.
I'm using Autohotkey 1.1.36.2 .
PS: Dbgwin_Output() is implemented here: https://github.com/chjfth/AmHotkey/blob/master/libs/debugwin.ahk
If you used AHKv2, you could make use of HasMethod. I'd recommend the usage of AHKv2, it's already at the RC3 stage.
Something like this should work pretty well to cover all basic use cases:
fnStr := "fnHello"
fnObjA := %fnStr%
fnObjB := fnobjA.bind("0th")
fnObjC := fnObjB
class HelloClass
{
Call() => MsgBox("Hello")
}
fnClass := HelloClass()
class NotCallable
{
}
no1 := "NoSuch"
no2 := {key: "value"}
classNo := NotCallable()
MsgBox(
"`"fnHello`": " IsCallable("fnHello") "`n" ;support pure strings
"fnStr: " IsCallable(fnStr) "`n" ;support string objects
"fnObjA: " IsCallable(fnObjA) "`n" ;support user-defined function objects
"fnObjB: " IsCallable(fnObjB) "`n" ;support bound function objects
"fnObjC: " IsCallable(fnObjC) "`n" ;same as fnObjA
"`"MsgBox`": " IsCallable("MsgBox") "`n" ;support built-in functions as pure strings
"MsgBox: " IsCallable(MsgBox) "`n" ;support built-in functions
"fnClass: " IsCallable(fnClass) "`n`n" ;support user defined classes
"`"NoSuch`": " IsCallable("NoSuch") "`n"
"no1: " IsCallable(no1) "`n"
"no2: " IsCallable(no2) "`n"
"classNo: " IsCallable(classNo) "`n"
)
fnHello(param := "")
{
MsgBox("hi " param)
}
IsCallable(inp)
{
if (HasMethod(inp))
return true
try
{
inp := %inp%
}
catch
{
return false
}
return HasMethod(inp)
}
Result:
"fnHello": 1
fnStr: 1
fnObjA: 1
fnObjB: 1
fnObjC: 1
"MsgBox": 1
MsgBox: 1
fnClass: 1
"NoSuch": 0
no1: 0
no2: 0
classNo: 0

How natural sorting a varchar column with Doctrine

I have a table with a column that contains names (varchar) but some names have numbers inside and the ordering is not the expected.
I have something like:
Courlis
D11 Espadon
D13 Balbuzard
D1 empacher
D2
But I expect:
Courlis
D1 empacher
D2
D11 Espadon
D13 Balbuzard
I've found lot of tips about it, but it's always on ordering numbers only stored as string: add a 0 to convert it in numbers, check the length of the string to place 1 before 10, etc... But in my case it can't work.
I can't use SQL query, because I use it in a form of a Symfony application that need a QueryBuilder.
Here is a way to do this with ICU collations in PostgreSQL (available from v10 on):
CREATE COLLATION en_natural (
LOCALE = 'en-US-u-kn-true',
PROVIDER = 'icu'
);
CREATE TABLE atable (textcol text COLLATE en_natural);
COPY atable FROM STDIN;
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF signal.
>> Courlis
>> D11 Espadon
>> D13 Balbuzard
>> D1 empacher
>> D2
>> \.
test=# SELECT textcol FROM atable ORDER BY textcol;
textcol
---------------
Courlis
D1 empacher
D2
D11 Espadon
D13 Balbuzard
(5 rows)
Thanks to Laurentz Albe for your answer,for a Step by step in a Symfony application:
Create a Migration file that create the custom collation
<?php
declare(strict_types=1);
namespace DoctrineMigrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
/**
* Auto-generated Migration: Please modify to your needs!
*/
final class Version20200221215657 extends AbstractMigration
{
public function getDescription() : string
{
return '';
}
public function up(Schema $schema) : void
{
// this up() migration is auto-generated, please modify it to your needs
$this->abortIf($this->connection->getDatabasePlatform()->getName() !== 'postgresql', 'Migration can only be executed safely on \'postgresql\'.');
$this->addSql('CREATE COLLATION fr_natural (provider = "icu", locale = "fr-u-kn-true");');
}
public function down(Schema $schema) : void
{
// this down() migration is auto-generated, please modify it to your needs
$this->abortIf($this->connection->getDatabasePlatform()->getName() !== 'postgresql', 'Migration can only be executed safely on \'postgresql\'.');
$this->addSql('DROP COLLATION fr_natural');
}
}
Create a Doctrine Custom Function (Collate DQL)
<?php
namespace App\DQL;
use Doctrine\ORM\Query\AST\Functions\FunctionNode;
use Doctrine\ORM\Query\Lexer;
class Collate extends FunctionNode
{
public $expressionToCollate = null;
public $collation = null;
public function parse(\Doctrine\ORM\Query\Parser $parser)
{
$parser->match(Lexer::T_IDENTIFIER);
$parser->match(Lexer::T_OPEN_PARENTHESIS);
$this->expressionToCollate = $parser->StringPrimary();
$parser->match(Lexer::T_COMMA);
$parser->match(Lexer::T_IDENTIFIER);
$lexer = $parser->getLexer();
$this->collation = $lexer->token['value'];
$parser->match(Lexer::T_CLOSE_PARENTHESIS);
}
public function getSql(\Doctrine\ORM\Query\SqlWalker $sqlWalker)
{
return sprintf( '%s COLLATE %s', $sqlWalker->walkStringPrimary($this->expressionToCollate), $this->collation );
}
}
Register the new Function in the Doctrine config
doctrine:
dbal:
url: '%env(resolve:DATABASE_URL)%'
# IMPORTANT: You MUST configure your server version,
# either here or in the DATABASE_URL env var (see .env file)
server_version: '12'
orm:
auto_generate_proxy_classes: true
naming_strategy: doctrine.orm.naming_strategy.underscore_number_aware
auto_mapping: true
mappings:
App:
is_bundle: false
type: annotation
dir: '%kernel.project_dir%/src/Entity'
prefix: 'App\Entity'
alias: App
dql:
string_functions:
collate: App\DQL\Collate
And finally juste use it in the query when needed
$query = $this->createQueryBuilder('shell')
->orderBy('COLLATE(shell.name, fr_natural)', 'ASC')
->getQuery();
return $query->getResult();

Is there a way to convert juniper "json" or "xml" config to "set" or "show" config?

We use juniper hardware with junos version 15. In this version we can export our config as "json" or "xml" which we want to use to edit it with our automation tooling.
Importing however is only possible in "set" or "show" format.
Is there a tool to convert "json" or "xml" format to "set" or "show" format?
I can only find converters between "show" and "set".
We can't upgrade to version 16 where the import of "json" would be possible.
Here's a script I made at work, throw it in your bin and you can it via providing a filename or piping output. This assumes linux or mac so the os.isatty function works, but the logic can work anywhere:
usage demo:
person#laptop ~ > head router.cfg
## Last commit: 2021-04-20 21:21:39 UTC by vit
version 15.1X12.2;
groups {
BACKBONE-PORT {
interfaces {
<*> {
mtu 9216;
unit <*> {
family inet {
mtu 9150;
person#laptop ~ > convert.py router.cfg | head
set groups BACKBONE-PORT interfaces <*> mtu 9216
set groups BACKBONE-PORT interfaces <*> unit <*> family inet mtu 9150
set groups BACKBONE-PORT interfaces <*> unit <*> family inet6 mtu 9150
set groups BACKBONE-PORT interfaces <*> unit <*> family mpls maximum-labels 5
<... output removed... >
convert.py:
#!/usr/bin/env python3
# Class that attempts to parse out Juniper JSON into set format
# I think it works? still testing
#
# TODO:
# accumulate annotations and provide them as commands at the end. Will be weird as annotations have to be done after an edit command
from argparse import ArgumentParser, RawTextHelpFormatter
import sys, os, re
class TokenStack():
def __init__(self):
self._tokens = []
def push(self, token):
self._tokens.append(token)
def pop(self):
if not self._tokens:
return None
item = self._tokens[-1]
self._tokens = self._tokens[:-1]
return item
def peek(self):
if not self._tokens:
return None
return self._tokens[-1]
def __str__(self):
return " ".join(self._tokens)
def __repr__(self):
return " ".join(self._tokens)
def main():
# get file
a = ArgumentParser(prog="convert_jpr_json",
description="This program takes in Juniper style JSON (blah { format) and prints it in a copy pastable display set format",
epilog=f"Either supply with a filename or pipe config contents into this program and it'll print out the display set view.\nEx:\n{B}convert_jpr_json <FILENAME>\ncat <FILENAME> | convert_jpr_json{WHITE}",
formatter_class=RawTextHelpFormatter)
a.add_argument('file', help="juniper config in JSON format", nargs="?")
args = a.parse_args()
if not args.file and os.isatty(0):
a.print_help()
die("Please supply filename or provide piped input")
file_contents = None
if args.file:
try:
file_contents = open(args.file, "r").readlines()
except IOError as e:
die(f"Issue opening file {args.file}: {e}")
print(output_text)
else:
file_contents = sys.stdin.readlines()
tokens = TokenStack()
in_comment = False
new_config = []
for line_num, line in enumerate(file_contents):
if line.startswith("version ") or len(line) == 0:
continue
token = re.sub(r"^(.+?)#+[^\"]*$", r"\1", line.strip())
token = token.strip()
if (any(token.startswith(_) for _ in ["!", "#"])):
# annotations currently not supported
continue
if token.startswith("/*"):
# we're in a comment now until the next token (this will break if a multiline comment with # style { happens, but hopefully no-one is that dumb
in_comment = True
continue
if "inactive: " in token:
token = token.split("inactive: ")[1]
new_config.append(f"deactivate {tokens} {token}")
if token[-1] == "{":
in_comment = False
tokens.push(token.strip("{ "))
elif token[-1] == "}":
if not tokens.pop():
die("Invalid json supplied: unmatched closing } encountered on line " + f"{line_num}")
elif token[-1] == ";":
new_config.append(f"set {tokens} {token[:-1]}")
if tokens.peek():
print(tokens)
die("Unbalanced JSON: expected closing }, but encountered EOF")
print("\n".join(new_config))
def die(msg): print(f"\n{B}{RED}FATAL ERROR{WHITE}: {msg}"); exit(1)
RED = "\033[31m"; GREEN = "\033[32m"; YELLOW = "\033[33m"; B = "\033[1m"; WHITE = "\033[0m"
if __name__ == "__main__": main()
You can load XML configuration using edit-config RPC or load-configuration RPC. For more details:
https://www.juniper.net/documentation/en_US/junos/topics/reference/tag-summary/netconf-edit-config.html
https://www.juniper.net/documentation/en_US/junos/topics/reference/tag-summary/junos-xml-protocol-load-configuration.html
XML content can be loaded via an "op" script by placing the content inside a call to junos:load-configuration() template defined in "junos.xsl". Something like the following:
version 1.1;
ns jcs = "http://xml.juniper.net/junos/commit-scripts/1.0";
import "../import/junos.xsl";
var $arguments = {
<argument> {
<name> "file";
<description> "Filename of XML content to load";
}
<argument> {
<name> "action";
<description> "Mode for the load (override, replace, merge)";
}
}
param $file;
param $action = "replace";
match / {
<op-script-results> {
var $configuration = slax:document($file);
var $connection = jcs:open();
call jcs:load-configuration($connection, $configuration, $action);
}
}
Thanks,
Phil

Emoji value range

I was trying to take out all emoji chars out of a string (like a sanitizer). But I cannot find a complete set of emoji values.
What is the complete set of emoji chars' UTF16 values?
The Unicode standard's Unicode® Technical Report #51 includes a list of emoji (emoji-data.txt):
...
21A9 ; text ; L1 ; none ; j # V1.1 (↩) LEFTWARDS ARROW WITH HOOK
21AA ; text ; L1 ; none ; j # V1.1 (↪) RIGHTWARDS ARROW WITH HOOK
231A ; emoji ; L1 ; none ; j # V1.1 (⌚) WATCH
231B ; emoji ; L1 ; none ; j # V1.1 (⌛) HOURGLASS
...
I believe you would want to remove each character listed in this document which had a Default_Emoji_Style of emoji.
There is no way, other than reference to a definition list like this, to identify the emoji characters in Unicode. As the reference to the FAQ says, they are spread throughout different blocks.
I have composed list based on Joe's and Doctor.Who's answers:
U+00A9, U+00AE, U+203C, U+2049, U+20E3, U+2122, U+2139, U+2194-2199, U+21A9-21AA, U+231A, U+231B, U+2328, U+23CF, U+23E9-23F3, U+23F8-23FA, U+24C2, U+25AA, U+25AB, U+25B6, U+25C0, U+25FB-25FE, U+2600-27EF, U+2934, U+2935, U+2B00-2BFF, U+3030, U+303D, U+3297, U+3299, U+1F000-1F02F, U+1F0A0-1F0FF, U+1F100-1F64F, U+1F680-1F6FF, U+1F910-1F96B, U+1F980-1F9E0
unicode-range: U+0080-02AF, U+0300-03FF, U+0600-06FF, U+0C00-0C7F, U+1DC0-1DFF, U+1E00-1EFF, U+2000-209F, U+20D0-214F, U+2190-23FF, U+2460-25FF, U+2600-27EF, U+2900-29FF, U+2B00-2BFF, U+2C60-2C7F, U+2E00-2E7F, U+3000-303F, U+A490-A4CF, U+E000-F8FF, U+FE00-FE0F, U+FE30-FE4F, U+1F000-1F02F, U+1F0A0-1F0FF, U+1F100-1F64F, U+1F680-1F6FF, U+1F910-1F96B, U+1F980-1F9E0;
Emoji ranges are updated for every new version of Unicode Emoji. Ranges below are correct for version 14.0
Here is my gist for an advanced version of this code.
def is_contains_emoji(p_string_in_unicode):
"""
Instead of searching all chars of a text in a emoji lookup dictionary this function just
checks whether any char in the text is in unicode emoji range
It is much faster than a dictionary lookup for a large text
However it only tells whether a text contains an emoji. It does not return the found emojis
"""
range_min = ord(u'\U0001F300') # 127744
range_max = ord(u"\U0001FAF6") # 129782
range_min_2 = 126980
range_max_2 = 127569
range_min_3 = 169
range_max_3 = 174
range_min_4 = 8205
range_max_4 = 12953
if p_string_in_unicode:
for a_char in p_string_in_unicode:
char_code = ord(a_char)
if range_min <= char_code <= range_max:
# or range_min_2 <= char_code <= range_max_2 or range_min_3 <= char_code <= range_max_3 or range_min_4 <= char_code <= range_max_4:
return True
elif range_min_2 <= char_code <= range_max_2:
return True
elif range_min_3 <= char_code <= range_max_3:
return True
elif range_min_4 <= char_code <= range_max_4:
return True
return False
else:
return False
You can get ranges of characters meeting any requirements specified by their category and properties from the Official UnicodeSet Utility
According to their search result, the full range of emoji is:
[\U0001F3FB-\U0001F3FF * # \U0001F600 \U0001F603 \U0001F604 \U0001F601 \U0001F606 \U0001F605 \U0001F923 \U0001F602 \U0001F642 \U0001F643 \U0001FAE0 \U0001F609 \U0001F60A \U0001F607 \U0001F970 \U0001F60D \U0001F929 \U0001F618 \U0001F617 \u263A \U0001F61A \U0001F619 \U0001F972 \U0001F60B \U0001F61B \U0001F61C \U0001F92A \U0001F61D \U0001F911 \U0001F917 \U0001F92D \U0001FAE2 \U0001FAE3 \U0001F92B \U0001F914 \U0001FAE1 \U0001F910 \U0001F928 \U0001F610 \U0001F611 \U0001F636 \U0001FAE5 \U0001F60F \U0001F612 \U0001F644 \U0001F62C \U0001F925 \U0001FAE8 \U0001F60C \U0001F614 \U0001F62A \U0001F924 \U0001F634 \U0001F637 \U0001F912 \U0001F915 \U0001F922 \U0001F92E \U0001F927 \U0001F975 \U0001F976 \U0001F974 \U0001F635 \U0001F92F \U0001F920 \U0001F973 \U0001F978 \U0001F60E \U0001F913 \U0001F9D0 \U0001F615 \U0001FAE4 \U0001F61F \U0001F641 \u2639 \U0001F62E \U0001F62F \U0001F632 \U0001F633 \U0001F97A \U0001F979 \U0001F626-\U0001F628 \U0001F630 \U0001F625 \U0001F622 \U0001F62D \U0001F631 \U0001F616 \U0001F623 \U0001F61E \U0001F613 \U0001F629 \U0001F62B \U0001F971 \U0001F624 \U0001F621 \U0001F620 \U0001F92C \U0001F608 \U0001F47F \U0001F480 \u2620 \U0001F4A9 \U0001F921 \U0001F479-\U0001F47B \U0001F47D \U0001F47E \U0001F916 \U0001F63A \U0001F638 \U0001F639 \U0001F63B-\U0001F63D \U0001F640 \U0001F63F \U0001F63E \U0001F648-\U0001F64A \U0001F48B \U0001F48C \U0001F498 \U0001F49D \U0001F496 \U0001F497 \U0001F493 \U0001F49E \U0001F495 \U0001F49F \u2763 \U0001F494 \u2764 \U0001F9E1 \U0001F49B \U0001F49A \U0001F499 \U0001F49C \U0001FA75-\U0001FA77 \U0001F90E \U0001F5A4 \U0001F90D \U0001F4AF \U0001F4A2 \U0001F4A5 \U0001F4AB \U0001F4A6 \U0001F4A8 \U0001F573 \U0001F4A3 \U0001F4AC \U0001F5E8 \U0001F5EF \U0001F4AD \U0001F4A4 \U0001F44B \U0001F91A \U0001F590 \u270B \U0001F596 \U0001FAF1-\U0001FAF4 \U0001F44C \U0001F90C \U0001F90F \u270C \U0001F91E \U0001FAF0 \U0001F91F \U0001F918 \U0001F919 \U0001F448 \U0001F449 \U0001F446 \U0001F595 \U0001F447 \u261D \U0001FAF5 \U0001F44D \U0001F44E \u270A \U0001F44A \U0001F91B \U0001F91C \U0001F44F \U0001F64C \U0001FAF6 \U0001F450 \U0001F932 \U0001F91D \U0001F64F \U0001FAF7 \U0001FAF8 \u270D \U0001F485 \U0001F933 \U0001F4AA \U0001F9BE \U0001F9BF \U0001F9B5 \U0001F9B6 \U0001F442 \U0001F9BB \U0001F443 \U0001F9E0 \U0001FAC0 \U0001FAC1 \U0001F9B7 \U0001F9B4 \U0001F440 \U0001F441 \U0001F445 \U0001F444 \U0001FAE6 \U0001F476 \U0001F9D2 \U0001F466 \U0001F467 \U0001F9D1\U0001F471 \U0001F468\U0001F9D4 \U0001F469 \U0001F9D3 \U0001F474 \U0001F475 \U0001F64D \U0001F64E \U0001F645 \U0001F646 \U0001F481 \U0001F64B \U0001F9CF \U0001F647 \U0001F926 \U0001F937 \U0001F46E \U0001F575 \U0001F482 \U0001F977 \U0001F477 \U0001FAC5 \U0001F934 \U0001F478 \U0001F473 \U0001F472 \U0001F9D5 \U0001F935 \U0001F470 \U0001F930 \U0001FAC3 \U0001FAC4 \U0001F931 \U0001F47C \U0001F385 \U0001F936 \U0001F9B8 \U0001F9B9 \U0001F9D9-\U0001F9DF \U0001F9CC \U0001F486 \U0001F487 \U0001F6B6 \U0001F9CD \U0001F9CE \U0001F3C3 \U0001F483 \U0001F57A \U0001F574 \U0001F46F \U0001F9D6 \U0001F9D7 \U0001F93A \U0001F3C7 \u26F7 \U0001F3C2 \U0001F3CC \U0001F3C4 \U0001F6A3 \U0001F3CA \u26F9 \U0001F3CB \U0001F6B4 \U0001F6B5 \U0001F938 \U0001F93C-\U0001F93E \U0001F939 \U0001F9D8 \U0001F6C0 \U0001F6CC \U0001F46D \U0001F46B \U0001F46C \U0001F48F \U0001F491 \U0001F46A \U0001F5E3 \U0001F464 \U0001F465 \U0001FAC2 \U0001F463 \U0001F9B0 \U0001F9B1 \U0001F9B3 \U0001F9B2 \U0001F435 \U0001F412 \U0001F98D \U0001F9A7 \U0001F436 \U0001F415 \U0001F9AE \U0001F429 \U0001F43A \U0001F98A \U0001F99D \U0001F431 \U0001F408 \U0001F981 \U0001F42F \U0001F405 \U0001F406 \U0001F434 \U0001FACE \U0001FACF \U0001F40E \U0001F984 \U0001F993 \U0001F98C \U0001F9AC \U0001F42E \U0001F402-\U0001F404 \U0001F437 \U0001F416 \U0001F417 \U0001F43D \U0001F40F \U0001F411 \U0001F410 \U0001F42A \U0001F42B \U0001F999 \U0001F992 \U0001F418 \U0001F9A3 \U0001F98F \U0001F99B \U0001F42D \U0001F401 \U0001F400 \U0001F439 \U0001F430 \U0001F407 \U0001F43F \U0001F9AB \U0001F994 \U0001F987 \U0001F43B \U0001F428 \U0001F43C \U0001F9A5 \U0001F9A6 \U0001F9A8 \U0001F998 \U0001F9A1 \U0001F43E \U0001F983 \U0001F414 \U0001F413 \U0001F423-\U0001F427 \U0001F54A \U0001F985 \U0001F986 \U0001F9A2 \U0001F989 \U0001F9A4 \U0001FAB6 \U0001F9A9 \U0001F99A \U0001F99C \U0001FABD \U0001FABF \U0001F438 \U0001F40A \U0001F422 \U0001F98E \U0001F40D \U0001F432 \U0001F409 \U0001F995 \U0001F996 \U0001F433 \U0001F40B \U0001F42C \U0001F9AD \U0001F41F-\U0001F421 \U0001F988 \U0001F419 \U0001F41A \U0001FAB8 \U0001FABC \U0001F40C \U0001F98B \U0001F41B-\U0001F41D \U0001FAB2 \U0001F41E \U0001F997 \U0001FAB3 \U0001F577 \U0001F578 \U0001F982 \U0001F99F \U0001FAB0 \U0001FAB1 \U0001F9A0 \U0001F490 \U0001F338 \U0001F4AE \U0001FAB7 \U0001F3F5 \U0001F339 \U0001F940 \U0001F33A-\U0001F33C \U0001F337 \U0001FABB \U0001F331 \U0001FAB4 \U0001F332-\U0001F335 \U0001F33E \U0001F33F \u2618 \U0001F340-\U0001F343 \U0001FAB9 \U0001FABA \U0001F347-\U0001F34D \U0001F96D \U0001F34E-\U0001F353 \U0001FAD0 \U0001F95D \U0001F345 \U0001FAD2 \U0001F965 \U0001F951 \U0001F346 \U0001F954 \U0001F955 \U0001F33D \U0001F336 \U0001FAD1 \U0001F952 \U0001F96C \U0001F966 \U0001F9C4 \U0001F9C5 \U0001F344 \U0001F95C \U0001FAD8 \U0001F330 \U0001FADA \U0001FADB \U0001F35E \U0001F950 \U0001F956 \U0001FAD3 \U0001F968 \U0001F96F \U0001F95E \U0001F9C7 \U0001F9C0 \U0001F356 \U0001F357 \U0001F969 \U0001F953 \U0001F354 \U0001F35F \U0001F355 \U0001F32D \U0001F96A \U0001F32E \U0001F32F \U0001FAD4 \U0001F959 \U0001F9C6 \U0001F95A \U0001F373 \U0001F958 \U0001F372 \U0001FAD5 \U0001F963 \U0001F957 \U0001F37F \U0001F9C8 \U0001F9C2 \U0001F96B \U0001F371 \U0001F358-\U0001F35D \U0001F360 \U0001F362-\U0001F365 \U0001F96E \U0001F361 \U0001F95F-\U0001F961 \U0001F980 \U0001F99E \U0001F990 \U0001F991 \U0001F9AA \U0001F366-\U0001F36A \U0001F382 \U0001F370 \U0001F9C1 \U0001F967 \U0001F36B-\U0001F36F \U0001F37C \U0001F95B \u2615 \U0001FAD6 \U0001F375 \U0001F376 \U0001F37E \U0001F377-\U0001F37B \U0001F942 \U0001F943 \U0001FAD7 \U0001F964 \U0001F9CB \U0001F9C3 \U0001F9C9 \U0001F9CA \U0001F962 \U0001F37D \U0001F374 \U0001F944 \U0001F52A \U0001FAD9 \U0001F3FA \U0001F30D-\U0001F310 \U0001F5FA \U0001F5FE \U0001F9ED \U0001F3D4 \u26F0 \U0001F30B \U0001F5FB \U0001F3D5 \U0001F3D6 \U0001F3DC-\U0001F3DF \U0001F3DB \U0001F3D7 \U0001F9F1 \U0001FAA8 \U0001FAB5 \U0001F6D6 \U0001F3D8 \U0001F3DA \U0001F3E0-\U0001F3E6 \U0001F3E8-\U0001F3ED \U0001F3EF \U0001F3F0 \U0001F492 \U0001F5FC \U0001F5FD \u26EA \U0001F54C \U0001F6D5 \U0001F54D \u26E9 \U0001F54B \u26F2 \u26FA \U0001F301 \U0001F303 \U0001F3D9 \U0001F304-\U0001F307 \U0001F309 \u2668 \U0001F3A0 \U0001F6DD \U0001F3A1 \U0001F3A2 \U0001F488 \U0001F3AA \U0001F682-\U0001F68A \U0001F69D \U0001F69E \U0001F68B-\U0001F68E \U0001F690-\U0001F699 \U0001F6FB \U0001F69A-\U0001F69C \U0001F3CE \U0001F3CD \U0001F6F5 \U0001F9BD \U0001F9BC \U0001F6FA \U0001F6B2 \U0001F6F4 \U0001F6F9 \U0001F6FC \U0001F68F \U0001F6E3 \U0001F6E4 \U0001F6E2 \u26FD \U0001F6DE \U0001F6A8 \U0001F6A5 \U0001F6A6 \U0001F6D1 \U0001F6A7 \u2693 \U0001F6DF \u26F5 \U0001F6F6 \U0001F6A4 \U0001F6F3 \u26F4 \U0001F6E5 \U0001F6A2 \u2708 \U0001F6E9 \U0001F6EB \U0001F6EC \U0001FA82 \U0001F4BA \U0001F681 \U0001F69F-\U0001F6A1 \U0001F6F0 \U0001F680 \U0001F6F8 \U0001F6CE \U0001F9F3 \u231B \u23F3 \u231A \u23F0-\u23F2 \U0001F570 \U0001F55B \U0001F567 \U0001F550 \U0001F55C \U0001F551 \U0001F55D \U0001F552 \U0001F55E \U0001F553 \U0001F55F \U0001F554 \U0001F560 \U0001F555 \U0001F561 \U0001F556 \U0001F562 \U0001F557 \U0001F563 \U0001F558 \U0001F564 \U0001F559 \U0001F565 \U0001F55A \U0001F566 \U0001F311-\U0001F31C \U0001F321 \u2600 \U0001F31D \U0001F31E \U0001FA90 \u2B50 \U0001F31F \U0001F320 \U0001F30C \u2601 \u26C5 \u26C8 \U0001F324-\U0001F32C \U0001F300 \U0001F308 \U0001F302 \u2602 \u2614 \u26F1 \u26A1 \u2744 \u2603 \u26C4 \u2604 \U0001F525 \U0001F4A7 \U0001F30A \U0001F383 \U0001F384 \U0001F386 \U0001F387 \U0001F9E8 \u2728 \U0001F388-\U0001F38B \U0001F38D-\U0001F391 \U0001F9E7 \U0001F380 \U0001F381 \U0001F397 \U0001F39F \U0001F3AB \U0001F396 \U0001F3C6 \U0001F3C5 \U0001F947-\U0001F949 \u26BD \u26BE \U0001F94E \U0001F3C0 \U0001F3D0 \U0001F3C8 \U0001F3C9 \U0001F3BE \U0001F94F \U0001F3B3 \U0001F3CF \U0001F3D1 \U0001F3D2 \U0001F94D \U0001F3D3 \U0001F3F8 \U0001F94A \U0001F94B \U0001F945 \u26F3 \u26F8 \U0001F3A3 \U0001F93F \U0001F3BD \U0001F3BF \U0001F6F7 \U0001F94C \U0001F3AF \U0001FA80 \U0001FA81 \U0001F3B1 \U0001F52E \U0001FA84 \U0001F9FF \U0001FAAC \U0001F3AE \U0001F579 \U0001F3B0 \U0001F3B2 \U0001F9E9 \U0001F9F8 \U0001FA85 \U0001FAA9 \U0001FA86 \u2660 \u2665 \u2666 \u2663 \u265F \U0001F0CF \U0001F004 \U0001F3B4 \U0001F3AD \U0001F5BC \U0001F3A8 \U0001F9F5 \U0001FAA1 \U0001F9F6 \U0001FAA2 \U0001F453 \U0001F576 \U0001F97D \U0001F97C \U0001F9BA \U0001F454-\U0001F456 \U0001F9E3-\U0001F9E6 \U0001F457 \U0001F458 \U0001F97B \U0001FA71-\U0001FA73 \U0001F459 \U0001F45A \U0001FAAD \U0001FAAE \U0001F45B-\U0001F45D \U0001F6CD \U0001F392 \U0001FA74 \U0001F45E \U0001F45F \U0001F97E \U0001F97F \U0001F460 \U0001F461 \U0001FA70 \U0001F462 \U0001F451 \U0001F452 \U0001F3A9 \U0001F393 \U0001F9E2 \U0001FA96 \u26D1 \U0001F4FF \U0001F484 \U0001F48D \U0001F48E \U0001F507-\U0001F50A \U0001F4E2 \U0001F4E3 \U0001F4EF \U0001F514 \U0001F515 \U0001F3BC \U0001F3B5 \U0001F3B6 \U0001F399-\U0001F39B \U0001F3A4 \U0001F3A7 \U0001F4FB \U0001F3B7 \U0001FA97 \U0001F3B8-\U0001F3BB \U0001FA95 \U0001F941 \U0001FA98 \U0001FA87 \U0001FA88 \U0001F4F1 \U0001F4F2 \u260E \U0001F4DE-\U0001F4E0 \U0001F50B \U0001FAAB \U0001F50C \U0001F4BB \U0001F5A5 \U0001F5A8 \u2328 \U0001F5B1 \U0001F5B2 \U0001F4BD-\U0001F4C0 \U0001F9EE \U0001F3A5 \U0001F39E \U0001F4FD \U0001F3AC \U0001F4FA \U0001F4F7-\U0001F4F9 \U0001F4FC \U0001F50D \U0001F50E \U0001F56F \U0001F4A1 \U0001F526 \U0001F3EE \U0001FA94 \U0001F4D4-\U0001F4DA \U0001F4D3 \U0001F4D2 \U0001F4C3 \U0001F4DC \U0001F4C4 \U0001F4F0 \U0001F5DE \U0001F4D1 \U0001F516 \U0001F3F7 \U0001F4B0 \U0001FA99 \U0001F4B4-\U0001F4B8 \U0001F4B3 \U0001F9FE \U0001F4B9 \u2709 \U0001F4E7-\U0001F4E9 \U0001F4E4-\U0001F4E6 \U0001F4EB \U0001F4EA \U0001F4EC-\U0001F4EE \U0001F5F3 \u270F \u2712 \U0001F58B \U0001F58A \U0001F58C \U0001F58D \U0001F4DD \U0001F4BC \U0001F4C1 \U0001F4C2 \U0001F5C2 \U0001F4C5 \U0001F4C6 \U0001F5D2 \U0001F5D3 \U0001F4C7-\U0001F4CE \U0001F587 \U0001F4CF \U0001F4D0 \u2702 \U0001F5C3 \U0001F5C4 \U0001F5D1 \U0001F512 \U0001F513 \U0001F50F-\U0001F511 \U0001F5DD \U0001F528 \U0001FA93 \u26CF \u2692 \U0001F6E0 \U0001F5E1 \u2694 \U0001F52B \U0001FA83 \U0001F3F9 \U0001F6E1 \U0001FA9A \U0001F527 \U0001FA9B \U0001F529 \u2699 \U0001F5DC \u2696 \U0001F9AF \U0001F517 \u26D3 \U0001FA9D \U0001F9F0 \U0001F9F2 \U0001FA9C \u2697 \U0001F9EA-\U0001F9EC \U0001F52C \U0001F52D \U0001F4E1 \U0001F489 \U0001FA78 \U0001F48A \U0001FA79 \U0001FA7C \U0001FA7A \U0001FA7B \U0001F6AA \U0001F6D7 \U0001FA9E \U0001FA9F \U0001F6CF \U0001F6CB \U0001FA91 \U0001F6BD \U0001FAA0 \U0001F6BF \U0001F6C1 \U0001FAA4 \U0001FA92 \U0001F9F4 \U0001F9F7 \U0001F9F9-\U0001F9FB \U0001FAA3 \U0001F9FC \U0001FAE7 \U0001FAA5 \U0001F9FD \U0001F9EF \U0001F6D2 \U0001F6AC \u26B0 \U0001FAA6 \u26B1 \U0001F5FF \U0001FAA7 \U0001FAAA \U0001F3E7 \U0001F6AE \U0001F6B0 \u267F \U0001F6B9-\U0001F6BC \U0001F6BE \U0001F6C2-\U0001F6C5 \u26A0 \U0001F6B8 \u26D4 \U0001F6AB \U0001F6B3 \U0001F6AD \U0001F6AF \U0001F6B1 \U0001F6B7 \U0001F4F5 \U0001F51E \u2622 \u2623 \u2B06 \u2197 \u27A1 \u2198 \u2B07 \u2199 \u2B05 \u2196 \u2195 \u2194 \u21A9 \u21AA \u2934 \u2935 \U0001F503 \U0001F504 \U0001F519-\U0001F51D \U0001F6D0 \u269B \U0001F549 \u2721 \u2638 \u262F \u271D \u2626 \u262A \u262E \U0001F54E \U0001F52F \U0001FAAF \u2648-\u2653 \u26CE \U0001F500-\U0001F502 \u25B6 \u23E9 \u23ED \u23EF \u25C0 \u23EA \u23EE \U0001F53C \u23EB \U0001F53D \u23EC \u23F8-\u23FA \u23CF \U0001F3A6 \U0001F505 \U0001F506 \U0001F4F6 \U0001F4F3 \U0001F4F4 \U0001F6DC \u2640 \u2642 \u26A7 \u2716 \u2795-\u2797 \U0001F7F0 \u267E \u203C \u2049 \u2753-\u2755 \u2757 \u3030 \U0001F4B1 \U0001F4B2 \u2695 \u267B \u269C \U0001F531 \U0001F4DB \U0001F530 \u2B55 \u2705 \u2611 \u2714 \u274C \u274E \u27B0 \u27BF \u303D \u2733 \u2734 \u2747 \u00A9 \u00AE \u2122 \U0001F51F-\U0001F524 \U0001F170 \U0001F18E \U0001F171 \U0001F191-\U0001F193 \u2139 \U0001F194 \u24C2 \U0001F195 \U0001F196 \U0001F17E \U0001F197 \U0001F17F \U0001F198-\U0001F19A \U0001F201 \U0001F202 \U0001F237 \U0001F236 \U0001F22F \U0001F250 \U0001F239 \U0001F21A \U0001F232 \U0001F251 \U0001F238 \U0001F234 \U0001F233 \u3297 \u3299 \U0001F23A \U0001F235 \U0001F534 \U0001F7E0-\U0001F7E2 \U0001F535 \U0001F7E3 \U0001F7E4 \u26AB \u26AA \U0001F7E5 \U0001F7E7-\U0001F7E9 \U0001F7E6 \U0001F7EA \U0001F7EB \u2B1B \u2B1C \u25FC \u25FB \u25FE \u25FD \u25AA \u25AB \U0001F536-\U0001F53B \U0001F4A0 \U0001F518 \U0001F533 \U0001F532 \U0001F3C1 \U0001F6A9 \U0001F38C \U0001F3F4 \U0001F3F3 \U0001F1E6-\U0001F1FF 0-9]
Triple click to select whole line
You can choose to exclude basic latin characters[#*0-9] in your program.
If you only deal with English character and emoji character I think it is doable. First convert your string to UTF-16 characters, then check each characters whose value is bigger than 0x0xD800 (for emoji it is actually >=0xD836) should be emoji.
This is because "The Unicode standard permanently reserves the code point values between 0xD800 to 0xDFFF for UTF-16 encoding of the high and low surrogates" and of course English characters (and many other character won't fall in this range)
But because emoji code point starts from U1F300 their UFT-16 value actually fall in this range.
Check here for a quick reference for emoji UFT-16 value, if you don't bother to do it yourself.

Scala - Exception in thread "main" java.lang.NumberFormatException

I have seen other threads about this but none of them answered my question. The fact is I think that in my case the problem is not like the others.
Here is a link to the problem I tried to solve: http://projecteuler.net/problem=55
I am sure the algorithm is correct but what I suspect is that there is something going on with what Scala can convert to string. I don't know. Here is my code:
package projecteuler55
object PE55 extends App {
def rev ( x :BigInt) : BigInt = {
x.toString.reverse.toInt
}
def notPalindrome ( x:BigInt) : Boolean = {
if ( x != rev(x) ) true else false
}
def test (x:BigInt , steps:Int) : Boolean = {
if ( steps > 50 ) true
else if (notPalindrome(x) == false) false
else test ( x + rev(x) , steps +1)
}
var lychrel = 0
for (i<-10 until 10000){
if (test(i+rev(i),0)) lychrel += 1
}
println(lychrel)
}
and the error I get is this:
Exception in thread "main" java.lang.NumberFormatException: For input string: "2284457131"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Integer.parseInt(Integer.java:495)
at java.lang.Integer.parseInt(Integer.java:527)
at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229)
at scala.collection.immutable.StringOps.toInt(StringOps.scala:31)
at projecteuler55.PE55$.rev(PE55.scala:8)
at projecteuler55.PE55$.notPalindrome(PE55.scala:14)
at projecteuler55.PE55$.test(PE55.scala:20)
at projecteuler55.PE55$$anonfun$1.apply$mcVI$sp(PE55.scala:26)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
at projecteuler55.PE55$delayedInit$body.apply(PE55.scala:25)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:71)
at scala.App$$anonfun$main$1.apply(App.scala:71)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:32)
at scala.App$class.main(App.scala:71)
at projecteuler55.PE55$.main(PE55.scala:3)
at projecteuler55.PE55.main(PE55.scala)
Why does this happen? I also use BigInt in case of an overflow but this does not seem to help.
Thanks in advance
toInt expects a string that can be parsed to fit into a regular Int, which 2284457131 can't. You want to use BigInt(x.toString.reverse).