Who has interests in porting SunPinyin to ibus?
To build it on Solaris with SunStudio,
$ export CC=/opt/SUNWspro/bin/CC $ export LDFLAGS="-lCrun -lCstd"
$ export CC=/opt/SUNWspro/bin/CC $ export LDFLAGS="-lCrun -lCstd"
#!/usr/bin/python import xmlrpclib blogid = 'yongsun' user = 'yong.sun@sun.com' passwd = '**********' host = 'http://blogs.sun.com' server = xmlrpclib.ServerProxy(host+'/roller-services/xmlrpc', use_datetime=True) num = 1000 ... ... ... ...Please note that the passwd is the "Weblog Client API Password", it's probably your original login password, anyway, you could set it in your profile page.
class DATrie (object):
... ...
def find_base (self, s, children, i=1):
if s == 0:
return 0
if not children:
return s
while True:
for ch in children:
k = i + self.encode_character (ch)
if self.base[k] or self.check[k] or k == s:
i += 1
break
else:
break
return i
... ...
def construct_from_trie (self, trie, with_value=True):
nodes = [(trie.root, 0)]
while nodes:
trienode, s = nodes.pop(0)
b = self.find_base (s, trienode.trans)
self.base[s] = -b if trienode.val else b
if with_value: self.value[s] = trienode.val
for ch in trienode.trans:
c = self.encode_character (ch)
t = abs(self.base[s]) + c
self.check[t] = s if s else -1
nodes.append ((trienode.trans[ch], t))
for i in xrange (self.encode_character (max(trie.root.trans))+1):
if self.check[i] == -1:
self.check[i] = 0
For constructing larger dictionaries, I'd recommend you to use darts-clone, dastrie (only for a static trie), or libdatrie (only supports 16-btis value data).import gettext
'''
gettext module will use the encoding specified in Content-Type header for
Gnu mo files, and convert the message strings to unicode. Here you could
sepcify the *output* encoding to others.
'''
gettext.bind_textdomain_codeset('gedit', codeset='UTF-8')
'''
gettext module will try to retrieve messages from /usr/share/locale by
default, otherwise you need to explicitly set it.
'''
gettext.bindtextdomain ('gedit', '/usr/share/locale')
_ = lambda msg: gettext.dgettext ('gedit', msg)
N_ = lambda msg: msg
print _("Save")
#!/usr/bin/python
# copyright (c) 2008 Feng Zhu, Yong Sun
import heapq
from functools import partial
from numpy import *
from scipy.linalg import *
from scipy.cluster.vq import *
import pylab
def line_samples ():
vecs = random.rand (120, 2)
vecs [:,0] *= 3
vecs [0:40,1] = 1
vecs [40:80,1] = 2
vecs [80:120,1] = 3
return vecs
def gaussian_simfunc (v1, v2, sigma=1):
tee = (-norm(v1-v2)**2)/(2*(sigma**2))
return exp (tee)
def construct_W (vecs, simfunc=gaussian_simfunc):
n = len (vecs)
W = zeros ((n, n))
for i in xrange(n):
for j in xrange(i,n):
W[i,j] = W[j,i] = simfunc (vecs[i], vecs[j])
return W
def knn (W, k, mutual=False):
n = W.shape[0]
assert (k>0 and k<(n-1))
for i in xrange(n):
thr = heapq.nlargest(k+1, W[i])[-1]
for j in xrange(n):
if W[i,j] < thr:
W[i,j] = -W[i,j]
for i in xrange(n):
for j in xrange(i, n):
if W[i,j] + W[j,i] < 0:
W[i,j] = W[j,i] = 0
elif W[i,j] + W[j,i] == 0:
W[i,j] = W[j,i] = 0 if mutual else abs(W[i,j])
vecs = line_samples()
W = construct_W (vecs, simfunc=partial(gaussian_simfunc, sigma=2))
knn (W, 10)
D = diag([reduce(lambda x,y:x+y, Wi) for Wi in W])L = D - W
evals, evcts = eig(L,D)
vals = dict (zip(evals, evcts.transpose()))
keys = vals.keys()
keys.sort()
Y = array ([vals[k] for k in keys[:3]]).transpose()
res,idx = kmeans2(Y, 3, minit='points')
colors = [(1,2,3)[i] for i in idx]
pylab.scatter(vecs[:,0],vecs[:,1],c=colors)
pylab.show()
$pydoc scipy.cluster.vq.kmeans (or kmeans2). While the initial selected centers affect the performance a lot. Thanks Feng Zhu, that introduced k-means++ to us, which is a very good and effective way to select the initial centers.But from authors' c++ implementation, the processing (Utils.cpp:chooseSmartCenters()) seems a little different with the description in paper. Looks like we only need to minimize the
sum_{x in X} min (D(x)^2, ||x-xi||^2).
def kinit (X, k):
'init k seeds according to kmeans++'
n = X.shape[0]
'choose the 1st seed randomly, and store D(x)^2 in D[]'
centers = [X[randint(n)]]
D = [norm(x-centers[0])**2 for x in X]
for _ in range(k-1):
bestDsum = bestIdx = -1
for i in range(n):
'Dsum = sum_{x in X} min(D(x)^2,||x-xi||^2)'
Dsum = reduce(lambda x,y:x+y,
(min(D[j], norm(X[j]-X[i])**2) for j in xrange(n)))
if bestDsum < 0 or Dsum < bestDsum:
bestDsum, bestIdx = Dsum, i
centers.append (X[bestIdx])
D = [min(D[i], norm(X[i]-X[bestIdx])**2) for i in xrange(n)]
return array (centers)
'to use kinit() with kmeans2()'
res,idx = kmeans2(Y, kinit(Y,3), minit='points')
from functools import wraps
def interface (fn):
@wraps
def to_be_implemented (*args):
raise Exception ("Interface '%s' is not implemented!" % fn.__name__)
return to_be_implemented
class Foo (object):
@interface
def test (self): pass
class Bar(Foo):
def test (self): pass
class Qux(Foo):
pass
Bar().test()
Qux().test()

This blog copyright 2009 by yongsun