A:B,C,D,F,E,OB:A,C,E,KC:F,A,D,ID:A,E,F,LE:B,C,D,M,LF:A,B,C,D,E,O,MG:A,C,D,E,FH:A,C,D,E,OI:A,OJ:B,OK:A,C,DL:D,E,FM:E,F,GO:A,H,I,J 求出哪些人两两之间有共同好友,及他俩的共同好友都是谁 例如A-B:C,E A-E:B,C,D
一种错误的理解就是E在A的还有列表中,那么A也在E的列表,且A和E同时有的好友才是共同好友
,如果按照这个观点思考下去就简单了,但是这个是错的,因为A-E:B,C,D 这种不不满足
正确的理解是求人与人之间的共同好友,人与人之间是否是同一个好友,是否在彼此的好友列表无关。
如果这个程序不用mapreduce做那么应该是先把人全部切分出来,然后循环进行人与人的组合,组合之后将他们好友列表组合,将那些出现两次的还有找到,这些就是人与人之间的共同还有,也是人工去找共同好友的方法,
但是放在mapreuce。,,每次只能读取一行数据不能都到他行的,如果要读到其他行的就要找到一个key然后还要将其他行的数据类聚一起,这样才能读到其他行。
如果知道答案的话,这样想的话就可以避免混淆了
tom: apple,pear,banana,waterball
jerry:apple,pear
jack:banana,apple
哪些人两两之间有共同的水果,列举出两人所有的共同水果。这样大家都不会混淆了。但是工作中遇到的就是人和好友的问题,大胆的抽象成人和水果也是工作中要做的
下面链接是答案
package my.hadoop.hdfs.findFriend;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser; public class FindCommonFriendOne { public static class FindFriendMapper extends Mapper{ // 泛型,定义输入输出的类型 /** * 友 人 */ Text text = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 将mptask传给我们的文本内容转换成String String line = value.toString(); IntWritable ONE = new IntWritable(1); // 根据空格切分 String[] qqAndFriend = line.split(":");//分割出QQ号 String qq = qqAndFriend[0]; String otherFriend = ""; StringBuffer friendbuf = new StringBuffer(qqAndFriend[1]+","); String[] friends = qqAndFriend[1].split(","); for (String friend : friends) { //查找其他朋友 //otherFriend = friendbuf.delete(friendbuf.indexOf(friend),friendbuf.indexOf(friend)+1).toString(); context.write(new Text(friend), new Text(qq)); } } } public static class FindFriendReducer extends Reducer { @Override protected void reduce(Text Keyin, Iterable values, Context context) throws IOException, InterruptedException { String qqs = ""; for (Text val : values) { qqs +=val.toString() + ","; } context.write(Keyin, new Text(qqs)); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); job.setJarByClass(FindCommonFriendOne.class); job.setMapperClass(FindFriendMapper.class); job.setReducerClass(FindFriendReducer.class); //指定最终输出的数据kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean res = job.waitForCompletion(true); System.exit(res ? 0 :1); }}
package my.hadoop.hdfs.findFriend;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser; public class FindCommonFriendTwo { public static class FindFriendMapper extends Mapper{ // 泛型,定义输入输出的类型 /** * 友 人 */ Text text = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 将mptask传给我们的文本内容转换成String String line = value.toString(); IntWritable ONE = new IntWritable(1); // 根据空格切分 String[] friendAndQQ = line.split("\t");//分割出QQ号 String friend = friendAndQQ[0]; String otherFriend = ""; StringBuffer friendbuf = new StringBuffer(friendAndQQ[1] ); String[] qqs = friendAndQQ[1].split(","); for (int i=0;i < qqs.length;i++) { //查找其他朋友 for(int j = i+1;j 0) { context.write(new Text(qqs[i]+"-"+qqs[j]), new Text(friend)); } else{ context.write(new Text(qqs[j]+"-"+qqs[i]), new Text(friend)); } } } } } public static class FindFriendReducer extends Reducer { @Override protected void reduce(Text Keyin, Iterable values, Context context) throws IOException, InterruptedException { StringBuffer friends = new StringBuffer(); for (Text val : values) { if(friends.indexOf(val.toString())<0) { friends.append(val).append(","); } } context.write(Keyin, new Text(friends.toString())); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); job.setJarByClass(FindCommonFriendTwo.class); job.setMapperClass(FindFriendMapper.class); job.setReducerClass(FindFriendReducer.class); //指定最终输出的数据kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean res = job.waitForCompletion(true); System.exit(res ? 0 :1); }}
[hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes0/*A I,K,C,B,G,F,H,O,D,B A,F,J,E,C A,E,B,H,F,G,K,D G,C,K,A,L,F,E,H,E G,M,L,H,A,F,B,D,F L,M,D,C,G,A,G M,H O,I O,C,J O,K B,L D,E,M E,F,O A,H,I,J,F,
[hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes2/*B-A E,C,C-A F,D,C-B A,D-A E,F,D-B A,E,D-C F,A,E-A D,C,B,E-B C,E-C D,E-D L,F-A C,O,D,E,B,F-B C,A,E,F-C A,D,F-D E,A,F-E C,B,M,D,G-A E,D,C,F,G-B E,A,C,G-C D,F,A,G-D A,E,F,G-E D,C,G-F C,A,E,D,H-A O,E,C,D,H-B E,C,A,H-C D,A,H-D E,A,H-E C,D,H-F C,D,A,E,O,H-G C,A,E,D,I-A O,I-B A,I-C A,I-D A,I-F A,O,I-G A,I-H A,O,J-A B,O,J-E B,J-F O,B,J-H O,J-I O,K-A D,C,K-B A,C,K-C D,A,K-D A,K-E C,D,K-F D,C,A,K-G D,C,A,K-H C,D,A,K-I A,L-A E,D,F,L-B E,L-C D,F,L-D F,E,L-E D,L-F D,E,L-G E,F,D,L-H E,D,L-K D,M-A F,E,M-B E,M-C F,M-D F,E,M-F E,M-G E,F,M-H E,M-L E,F,O-B A,O-C I,A,O-D A,O-F A,O-G A,O-H A,O-I A,O-K A,