1+ using System ;
2+ using System . Collections . Generic ;
3+ using System . IO ;
4+ using System . Linq ;
5+ using System . Reflection ;
6+ using CedictParserLib ;
7+ using JiebaNet . Segmenter ;
8+ using static QuickInfo . NodeFactory ;
9+
10+ namespace QuickInfo ;
11+
12+ public class Chinese : IProcessor
13+ {
14+ private JiebaSegmenter segmenter ;
15+ private JiebaSegmenter Segmenter => segmenter ??= new JiebaSegmenter ( ) ;
16+
17+ private Dictionary < string , CedictEntry > dictionary ;
18+ private Dictionary < string , CedictEntry > Dictionary
19+ {
20+ get
21+ {
22+ if ( dictionary == null )
23+ {
24+ lock ( this )
25+ {
26+ if ( dictionary != null )
27+ {
28+ return dictionary ;
29+ }
30+
31+ dictionary = new Dictionary < string , CedictEntry > ( ) ;
32+
33+ var parser = new CedictParser (
34+ Path . Combine (
35+ Path . GetDirectoryName ( Assembly . GetExecutingAssembly ( ) . Location ) ,
36+ "Resources" ,
37+ "cedict_ts.u8" ) ) ;
38+
39+ foreach ( var entry in parser . ReadToEnd ( ) )
40+ {
41+ if ( entry . Simplified != null )
42+ {
43+ dictionary [ entry . Simplified ] = entry ;
44+ }
45+
46+ if ( entry . Traditional != null && entry . Traditional != entry . Simplified )
47+ {
48+ dictionary [ entry . Traditional ] = entry ;
49+ }
50+ }
51+ }
52+ }
53+
54+ return dictionary ;
55+ }
56+ }
57+
58+ public object GetResult ( Query query )
59+ {
60+ if ( query . IsHelp )
61+ {
62+ return HelpTable (
63+ ( "我的房子很大" , "Chinese" ) ) ;
64+ }
65+
66+ string text = query . OriginalInputTrim ;
67+
68+ if ( text . Length > 1024 )
69+ {
70+ return null ;
71+ }
72+
73+ for ( int i = 0 ; i < text . Length ; i ++ )
74+ {
75+ char ch = text [ i ] ;
76+ if ( ! IsChinese ( ch ) )
77+ {
78+ return null ;
79+ }
80+ }
81+
82+ var words = Segmenter . Cut ( text ) ;
83+ words = SplitFurther ( words ) . ToArray ( ) ;
84+
85+ var list = new List < object > ( ) ;
86+
87+ foreach ( var word in words )
88+ {
89+ var details = new List < object > ( ) ;
90+ if ( Dictionary . TryGetValue ( word , out var entry ) )
91+ {
92+ if ( entry . Pinyin != null )
93+ {
94+ details . Add ( Accent ( entry . Pinyin ) ) ;
95+ }
96+
97+ if ( entry . Definitions != null && entry . Definitions . Length > 0 )
98+ {
99+ var definitions = string . Join ( "; " , entry . Definitions ) ;
100+ details . Add ( definitions ) ;
101+ }
102+ }
103+
104+ var card = new Node
105+ {
106+ Text = word ,
107+ Style = NodeStyles . Card
108+ } ;
109+ card [ NodeStyles . HeaderStyle ] = "charSample" ;
110+
111+ if ( details . Count > 0 )
112+ {
113+ card . List = details ;
114+ }
115+
116+ list . Add ( card ) ;
117+ }
118+
119+ return list ;
120+ }
121+
122+ private IEnumerable < string > SplitFurther ( IEnumerable < string > words )
123+ {
124+ foreach ( var word in words )
125+ {
126+ if ( word . Length == 1 )
127+ {
128+ yield return word ;
129+ }
130+ else if ( ! Dictionary . ContainsKey ( word ) && word . All ( ch => Dictionary . ContainsKey ( ch . ToString ( ) ) ) )
131+ {
132+ foreach ( var ch in word )
133+ {
134+ yield return ch . ToString ( ) ;
135+ }
136+ }
137+ else
138+ {
139+ yield return word ;
140+ }
141+ }
142+ }
143+
144+ public static bool IsChinese ( char c )
145+ {
146+ int code = c ;
147+
148+ // CJK Unified Ideographs (Common Chinese characters)
149+ if ( code >= 0x4E00 && code <= 0x9FFF )
150+ {
151+ return true ;
152+ }
153+
154+ // CJK Unified Ideographs Extension A
155+ if ( code >= 0x3400 && code <= 0x4DBF )
156+ {
157+ return true ;
158+ }
159+
160+ // CJK Unified Ideographs Extension B - F (less common, surrogate pairs needed)
161+ if ( code >= 0x20000 && code <= 0x2EBEF )
162+ {
163+ return true ;
164+ }
165+
166+ // CJK Compatibility Ideographs
167+ if ( code >= 0xF900 && code <= 0xFAFF )
168+ {
169+ return true ;
170+ }
171+
172+ // CJK Radicals Supplement
173+ if ( code >= 0x2E80 && code <= 0x2EFF )
174+ {
175+ return true ;
176+ }
177+
178+ // Kangxi Radicals
179+ if ( code >= 0x2F00 && code <= 0x2FDF )
180+ {
181+ return true ;
182+ }
183+
184+ return false ;
185+ }
186+ }
0 commit comments