Changeset 1660:592058a16c4d
- Timestamp:
- 06/13/08 17:13:29 (2 months ago)
- Branch:
- default
- Location:
- src/grid/hadoop
- Files:
-
- 2 modified
-
benchmarks/digg/topic-correlation-reduce (modified) (2 diffs)
-
bin/createvm (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
src/grid/hadoop/benchmarks/digg/topic-correlation-reduce
r1659 r1660 1 1 #! /usr/bin/python 2 2 import sys 3 from math import sqrt 4 5 topic_desc = ['offbeat_news','gadgets','videos_comedy','world_news','tech_news','apple','health','videos_music','playable_web_games','security','environment','space','linux_unix','videos_educational','videos_people','politics','xbox_360','music','videos_animation','hardware','business_finance','2008_us_elections','programming','design','software','general_sciences','gaming_news','other_sports','movies','pc_games','mods','political_opinion','baseball','nintendo_wii','extreme_sports','microsoft','celebrity','playstation_3','television','tennis','videos_gaming','football','tech_deals','videos_sports','motorsport','basketball','hockey','golf','soccer','comics_animation','odd_stuff','educational','xbox','comedy','people','nintendo','playstation','pets_animals','travel_places','arts_culture','food_drink','autos'] 6 topic_map = {} 7 8 for i in range(0, len(topic_desc)): 9 topic_map["%d" % (i+1)] = topic_desc[i] 10 topic_map['0'] = topic_map['1'] 3 11 4 12 def add_value(key, value, map): … … 7 15 map[key] += value 8 16 9 users = 0 10 key_map = {} 17 def corr_sort(x, y): 18 if x['corr'] < y['corr']: 19 return -1 20 elif x['corr'] > y['corr']: 21 return 1 22 return 0 23 24 sums = {} 25 topics = {} 11 26 for line in sys.stdin: 12 users += 113 27 line = line.strip() 14 28 cols = line.split('\t') 15 i = 1 16 topic_map = {} 17 while i < len(cols): 18 topic = cols[i] 19 prob = float(cols[i+1]) 20 #print "%s_mean\t%.20f" % (topic, prob) 21 add_value("%s_mean" % topic, prob, key_map) 22 topic_map[topic] = prob 23 #print "%s_square\t%.20f" % (topic, prob**2) 24 add_value("%s_square" % topic, prob**2, key_map) 25 i += 2 26 for topic, val in topic_map.iteritems(): 27 for topic2, val2 in topic_map.iteritems(): 28 if topic != topic2: 29 t1 = long(topic) 30 t2 = long(topic2) 31 t_1 = min(t1,t2) 32 t_2 = max(t1,t2) 33 #print "%d_%d_cross\t%.20f" % (t_1,t_2,val*val2) 34 add_value("%d_%d_cross" % (t_1,t_2),val*val2, key_map) 35 print "%s\t%d" % ("users",users) 36 for key, value in key_map.iteritems(): 37 print "%s\t%.20f" % (key, value) 38 #print >>sys.stderr,"%s\t%.20f" % (key, value) 29 if cols[0].endswith("_mean"): 30 topics[cols[0].replace('_mean','')] = 0 31 add_value(cols[0], float(cols[1]), sums) 32 topic_names = topics.keys() 33 topic_names.sort(lambda x,y: int(x)-int(y)) 34 corr_list = [] 35 n = float(sums['users']) 36 for topic1 in topic_names: 37 for topic2 in topic_names: 38 if topic1 == topic2: 39 continue 40 if not sums.has_key(topic1 + "_" + topic2 + "_cross"): 41 continue 42 sum1 = sums[topic1 + "_mean"] 43 sum2 = sums[topic2 + "_mean"] 44 sum12 = sums[topic1 + "_" + topic2 + "_cross"] 45 sqr1 = sums[topic1 + "_sqr"] 46 sqr2 = sums[topic2 + "_sqr"] 47 corr = (n * sum12 - sum1 * sum2)/(sqrt(n * sqr1-sum1**2)*sqrt(n*sqr2-sum2**2)) 48 corr_list.append({'t1':topic1,'t2':topic2,'corr': corr}) 49 corr_list.sort(corr_sort) 50 for corr in corr_list: 51 print "%.5f %s %s %s %s" % (corr['corr'],corr['t1'],topic_map[corr['t1']],corr['t2'],topic_map[corr['t2']]) -
src/grid/hadoop/bin/createvm
r1650 r1660 32 32 MEMORY=`cat ${HADOOP_ROOT}/hadoop.conf | grep memory_limit | awk '{print $2}'` 33 33 DISK=`cat ${HADOOP_ROOT}/hadoop.conf | grep disk_limit | awk '{print $2}'` 34 CPU=`cat ${HADOOP_ROOT}/hadoop.conf | grep cpu_limit | awk '{print $2}'` 34 35 MEM=`${HADOOP_INSTALL}/bin/mult ${MEMORY} 1e-6` 35 36 DIS=`${HADOOP_INSTALL}/bin/mult ${DISK} 1e-9` 36 PREFS="CPU:${CPUWEIGHT},0.01,1.0 disk:${DISKWEIGHT},${DIS}GB,${DIS}GB memory:${MEMWEIGHT},${MEM}MB,${MEM}MB" 37 CPU=`${HADOOP_INSTALL}/bin/mult ${CPU} 1e-6` 38 39 PREFS="CPU:${CPUWEIGHT},${CPU}MHz,3.0GHz disk:${DISKWEIGHT},${DIS}GB,${DIS}GB memory:${MEMWEIGHT},${MEM}MB,${MEM}MB" 37 40 38 41 MASTER=`cat ${HADOOP_ROOT}/.master 2>/dev/null`
