apriori算法
发布时间:2020-12-17 17:14:46 所属栏目:Python 来源:网络整理
导读:今天PHP站长网 52php.cn把收集自互联网的代码分享给大家,仅供参考。 """Description : Simple Python implementation of the Apriori AlgorithmUsage: $python apriori.py -f DATASET.csv -s minSupport -c minConfidenc
以下代码由PHP站长网 52php.cn收集自互联网 现在PHP站长网小编把它分享给大家,仅供参考 """ Description : Simple Python implementation of the Apriori Algorithm Usage: $python apriori.py -f DATASET.csv -s minSupport -c minConfidence $python apriori.py -f DATASET.csv -s 0.15 -c 0.6 """ import sys from itertools import chain,combinations from collections import defaultdict from optparse import OptionParser def subsets(arr): """ Returns non empty subsets of arr""" return chain(*[combinations(arr,i + 1) for i,a in enumerate(arr)]) def returnItemsWithMinSupport(itemSet,transactionList,minSupport,freqSet): """calculates the support for items in the itemSet and returns a subset of the itemSet each of whose elements satisfies the minimum support""" _itemSet = set() localSet = defaultdict(int) for item in itemSet: for transaction in transactionList: if item.issubset(transaction): freqSet[item] += 1 localSet[item] += 1 for item,count in localSet.items(): support = float(count)/len(transactionList) if support >= minSupport: _itemSet.add(item) return _itemSet def joinSet(itemSet,length): """Join a set with itself and returns the n-element itemsets""" return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length]) def getItemSetTransactionList(data_iterator): transactionList = list() itemSet = set() for record in data_iterator: transaction = frozenset(record) transactionList.append(transaction) for item in transaction: itemSet.add(frozenset([item])) # Generate 1-itemSets return itemSet,transactionList def runApriori(data_iter,minConfidence): """ run the apriori algorithm. data_iter is a record iterator Return both: - items (tuple,support) - rules ((pretuple,posttuple),confidence) """ itemSet,transactionList = getItemSetTransactionList(data_iter) freqSet = defaultdict(int) largeSet = dict() # Global dictionary which stores (key=n-itemSets,value=support) # which satisfy minSupport assocRules = dict() # Dictionary which stores Association Rules oneCSet = returnItemsWithMinSupport(itemSet,freqSet) currentLSet = oneCSet k = 2 while(currentLSet != set([])): largeSet[k-1] = currentLSet currentLSet = joinSet(currentLSet,k) currentCSet = returnItemsWithMinSupport(currentLSet,freqSet) currentLSet = currentCSet k = k + 1 def getSupport(item): """local function which Returns the support of an item""" return float(freqSet[item])/len(transactionList) toRetItems = [] for key,value in largeSet.items(): toRetItems.extend([(tuple(item),getSupport(item)) for item in value]) toRetRules = [] for key,value in largeSet.items()[1:]: for item in value: _subsets = map(frozenset,[x for x in subsets(item)]) for element in _subsets: remain = item.difference(element) if len(remain) > 0: confidence = getSupport(item)/getSupport(element) if confidence >= minConfidence: toRetRules.append(((tuple(element),tuple(remain)),confidence)) return toRetItems,toRetRules def printResults(items,rules): """prints the generated itemsets and the confidence rules""" for item,support in items: print "item: %s,%.3f" % (str(item),support) print "n------------------------ RULES:" for rule,confidence in rules: pre,post = rule print "Rule: %s ==> %s,%.3f" % (str(pre),str(post),confidence) def dataFromFile(fname): """Function which reads from the file and yields a generator""" file_iter = open(fname,'rU') for line in file_iter: line = line.strip().rstrip(',') # Remove trailing comma record = frozenset(line.split(',')) yield record if __name__ == "__main__": optparser = OptionParser() optparser.add_option('-f','--inputFile',dest='input',help='filename containing csv',default=None) optparser.add_option('-s','--minSupport',dest='minS',help='minimum support value',default=0.15,type='float') optparser.add_option('-c','--minConfidence',dest='minC',help='minimum confidence value',default=0.6,type='float') (options,args) = optparser.parse_args() inFile = None if options.input is None: inFile = sys.stdin elif options.input is not None: inFile = dataFromFile(options.input) else: print 'No dataset filename specified,system with exitn' sys.exit('System will exit') minSupport = options.minS minConfidence = options.minC items,rules = runApriori(inFile,minConfidence) printResults(items,rules) 以上内容由PHP站长网【52php.cn】收集整理供大家参考研究 如果以上内容对您有帮助,欢迎收藏、点赞、推荐、分享。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |