@diyer22
2016-11-28T13:53:24.000000Z
字数 2768
阅读 2296
未分类
from __future__ import unicode_literals
def log(d):
show_len = 40
if isinstance(d,list):
for i in d:
strr = str(i)
if len(strr) > show_len:
print strr[:show_len] + '...'
else:
print strr
return
if isinstance(d,dict):
for i in d:
strr = str(i)+' = '+str(d[i])
if len(strr) > show_len:
print strr[:show_len] + '...'
else:
print strr
return
print d
MAX_L = 22
def p_tree(t,level = 0):
if level > MAX_L:
return
for i in t:
if not isinstance(i,int):
continue
print level*'| '+'%d :%d'%(i, t[i]['c'])
p_tree(t[i],level+1)
min_sup = 0.01
min_conf = 0.5
path = './retail2.dat'
f = open(path, 'r')
db = f.readlines()
f.close()
db = map(lambda strr:map(lambda x:int(x), strr[:-1].split(' ')[:-1]), db)
min_sup *= len(db)
dic = {} # 记录it 频率
dicc = {} # 记录所有频繁项集
min_f = [] # 记录最小边缘频繁项集
a_r = [] # 记录边缘强关系
for items in db:
for item in items:
dic[item] = 1 if item not in dic else dic[item] + 1
# 删除小于 min_sup 的item
[dic.pop(k) if not dic[k]>= min_sup else None for k in dic.keys()]
root = {}
record = {}
def add_tree(its, tree, count, record, dic):
'''
将 count个 items 加入 tree
如果record_flag为True 则记录
tree 的结构
s self
f father
c count
[\d] child
'''
its = filter(lambda x: x in dic, its)
its.sort(lambda x, y: 1 if dic[x] < dic[y] else -1 )
for it in its:
if it not in tree:
tree[it] = {}
tree[it]['f'] = tree
tree[it]['s'] = it
record[it]=[tree[it]] if it not in record else record[it]+[tree[it]]
tree = tree[it]
tree['c'] = count if 'c' not in tree else tree['c'] + count
for items in db:
add_tree(items, root, 1, record, dic)
#print 'dic',
#log(dic)
print '\nrecord',log('')
#p_tree(root)
fp_l = dic.items()
fp_l.sort(lambda x, y: 1 if x[1]<y[1] else -1)
def creat_its(t):
l=[]
while 1:
t = t['f']
if 'f' not in t:
break
l += [t['s']]
return l[::-1]
dicc ={}
def is_singl_tree(t):
def c_n(n, listt, l, b, r): # 递归生成全排列
''' n 剩余选择数量
listt 全排列目标
b 起始值
l 临时记录
'''
if n == 0: # 生成关系并计算支持度
r.append(tuple(l))
return
for i in range(b, len(listt)-n+1):
l += [listt[i]]
c_n(n-1, listt, l, i+1,r)
l.pop()
return
def combination(its): # 对边缘频繁项集 递归生成排列
n = len(its)
l = []
if n == 1:
return [its]
for lenth in range(1, n+1):
c_n(lenth, its, [],0,l)
print 'its',its,l
return l
l = []
root = t
while 1:
tmp = filter(lambda x:isinstance(x, int), t.keys())
n = len(tmp)
if n != 1 and n != 0:
# print l,p_tree(root)
return None
if n == 0 :
if 'f' in t:
# print 'tmp',l
# print tmp,root.keys(),l,combination(l)
return [list(i) for i in combination(l)]
return None
l += tmp
t=t[tmp[0]]
def header_to_db(fp_list, record):
l = []
# print 'fp',fp_list
for it in fp_list[::-1]:
it = it[0]
db = []
for t in record[it]:
db += [(creat_its(t), t['c'])]
# print 'db',db
dic = {}
for items in db:
for item in items[0]:
dic[item] = items[1] if item not in dic else dic[item] + items[1]
# 删除小于 min_sup 的item
[dic.pop(k) if not dic[k]>= min_sup else None for k in dic.keys()]
t = {'s':'root'}
header = {}
for its in db:
add_tree(its[0], t, its[1], header, dic)
r = is_singl_tree(t)
# print 'it',it,r,dic
if r != None: # 若已经是 single 了 则不必继续递归
# print 'r',r
l += [i+[it] for i in r]+[[it]]
# print 'add',[i+[it] for i in r]
continue
fp_l = dic.items()
fp_l.sort(lambda x, y: 1 if x[1]<y[1] else -1)
r = header_to_db(fp_l, header)
l += [i+[it] for i in r]+[[it]]
return l
#for it in fp_l[::-1]:
# f(it[0])
r = header_to_db(fp_l, record)
#print 'resoult',r
print 'frequent itemset', len(r)