Skip to content

Commit fd8bb9c

Browse files
committed
Add a simple Chinese translator
1 parent ee698ef commit fd8bb9c

File tree

4 files changed

+123556
-0
lines changed

4 files changed

+123556
-0
lines changed

src/Info/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ static void Main(string[] args)
1111
var engine = new Engine
1212
(
1313
typeof(Ascii),
14+
typeof(Chinese),
1415
typeof(Color),
1516
typeof(QuickInfo.DateAndTime),
1617
typeof(Emoticons),
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Reflection;
6+
using CedictParserLib;
7+
using JiebaNet.Segmenter;
8+
using static QuickInfo.NodeFactory;
9+
10+
namespace QuickInfo;
11+
12+
public class Chinese : IProcessor
13+
{
14+
private JiebaSegmenter segmenter;
15+
private JiebaSegmenter Segmenter => segmenter ??= new JiebaSegmenter();
16+
17+
private Dictionary<string, CedictEntry> dictionary;
18+
private Dictionary<string, CedictEntry> Dictionary
19+
{
20+
get
21+
{
22+
if (dictionary == null)
23+
{
24+
lock (this)
25+
{
26+
if (dictionary != null)
27+
{
28+
return dictionary;
29+
}
30+
31+
dictionary = new Dictionary<string, CedictEntry>();
32+
33+
var parser = new CedictParser(
34+
Path.Combine(
35+
Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location),
36+
"Resources",
37+
"cedict_ts.u8"));
38+
39+
foreach (var entry in parser.ReadToEnd())
40+
{
41+
if (entry.Simplified != null)
42+
{
43+
dictionary[entry.Simplified] = entry;
44+
}
45+
46+
if (entry.Traditional != null && entry.Traditional != entry.Simplified)
47+
{
48+
dictionary[entry.Traditional] = entry;
49+
}
50+
}
51+
}
52+
}
53+
54+
return dictionary;
55+
}
56+
}
57+
58+
public object GetResult(Query query)
59+
{
60+
if (query.IsHelp)
61+
{
62+
return HelpTable(
63+
("我的房子很大", "Chinese"));
64+
}
65+
66+
string text = query.OriginalInputTrim;
67+
68+
if (text.Length > 1024)
69+
{
70+
return null;
71+
}
72+
73+
for (int i = 0; i < text.Length; i++)
74+
{
75+
char ch = text[i];
76+
if (!IsChinese(ch))
77+
{
78+
return null;
79+
}
80+
}
81+
82+
var words = Segmenter.Cut(text);
83+
words = SplitFurther(words).ToArray();
84+
85+
var list = new List<object>();
86+
87+
foreach (var word in words)
88+
{
89+
var details = new List<object>();
90+
if (Dictionary.TryGetValue(word, out var entry))
91+
{
92+
if (entry.Pinyin != null)
93+
{
94+
details.Add(Accent(entry.Pinyin));
95+
}
96+
97+
if (entry.Definitions != null && entry.Definitions.Length > 0)
98+
{
99+
var definitions = string.Join("; ", entry.Definitions);
100+
details.Add(definitions);
101+
}
102+
}
103+
104+
var card = new Node
105+
{
106+
Text = word,
107+
Style = NodeStyles.Card
108+
};
109+
card[NodeStyles.HeaderStyle] = "charSample";
110+
111+
if (details.Count > 0)
112+
{
113+
card.List = details;
114+
}
115+
116+
list.Add(card);
117+
}
118+
119+
return list;
120+
}
121+
122+
private IEnumerable<string> SplitFurther(IEnumerable<string> words)
123+
{
124+
foreach (var word in words)
125+
{
126+
if (word.Length == 1)
127+
{
128+
yield return word;
129+
}
130+
else if (!Dictionary.ContainsKey(word) && word.All(ch => Dictionary.ContainsKey(ch.ToString())))
131+
{
132+
foreach (var ch in word)
133+
{
134+
yield return ch.ToString();
135+
}
136+
}
137+
else
138+
{
139+
yield return word;
140+
}
141+
}
142+
}
143+
144+
public static bool IsChinese(char c)
145+
{
146+
int code = c;
147+
148+
// CJK Unified Ideographs (Common Chinese characters)
149+
if (code >= 0x4E00 && code <= 0x9FFF)
150+
{
151+
return true;
152+
}
153+
154+
// CJK Unified Ideographs Extension A
155+
if (code >= 0x3400 && code <= 0x4DBF)
156+
{
157+
return true;
158+
}
159+
160+
// CJK Unified Ideographs Extension B - F (less common, surrogate pairs needed)
161+
if (code >= 0x20000 && code <= 0x2EBEF)
162+
{
163+
return true;
164+
}
165+
166+
// CJK Compatibility Ideographs
167+
if (code >= 0xF900 && code <= 0xFAFF)
168+
{
169+
return true;
170+
}
171+
172+
// CJK Radicals Supplement
173+
if (code >= 0x2E80 && code <= 0x2EFF)
174+
{
175+
return true;
176+
}
177+
178+
// Kangxi Radicals
179+
if (code >= 0x2F00 && code <= 0x2FDF)
180+
{
181+
return true;
182+
}
183+
184+
return false;
185+
}
186+
}

src/QuickInfo/QuickInfo.csproj

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
<PackageReference Include="UnicodeInformation" Version="2.7.1" />
1414
<PackageReference Include="ThisAssembly.AssemblyInfo" Version="1.2.15" />
1515
<PackageReference Include="GitInfo" Version="3.1.0" />
16+
<PackageReference Include="jieba.NET" Version="0.42.2" GeneratePathProperty="True" />
17+
<PackageReference Include="CedictParser" Version="1.0.0" GeneratePathProperty="True" />
18+
</ItemGroup>
19+
20+
<ItemGroup>
21+
<None Include="$(PkgJieba_Net)\Resources\*" CopyToOutputDirectory="PreserveNewest" Link="Resources\%(Filename)%(Extension)" />
22+
<None Include="Resources\cedict_ts.u8" CopyToOutputDirectory="PreserveNewest" Link="Resources\cedict_ts.u8" />
1623
</ItemGroup>
1724

1825
<PropertyGroup>

0 commit comments

Comments
 (0)