资源简介
在Hadoop集群中,用MapReduce分布式计算TFIDF
代码片段和文件信息
package eb.cloud.mapreduce.MR.guoruonan;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
public class Tfidf {
public static class Mapper0 extends Mapper {
String filename;
public void map(LongWritable key Text value Context context)
throws IOException InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
filename = split.getPath().getName();
String newString = value.toString().toLowerCase();
String results[] = newString.split(“[^a-zA-Z]“);
int flag = 0;
for (String val : results) {
if (val.equals(““))
continue;
context.write(new Text(filename) new Text(val));
}
}
}
public static class Reducer0 extends Reducer {
public void reduce(Text key Iterable values Context context)
throws IOException InterruptedException {
ArrayList array = new ArrayList();
for (Text t : values) {
array.add(t.toString());
}
for (String str : array) {
context.write(new Text(key.toString() + “ “+str) new Text(““
+ array.size()));
//
}
}
}
public static class Mapper1 extends Mapper {
public void map(LongWritable key Text value Context context)
throws IOException InterruptedException {
String line = value.toString();
int index = line.indexOf(“ “);
context.write(new Text(line.substring(0 index))
new Text(line.substring(index + 1)));
}
}
public static class Reducer1 extends Reducer {
public void reduce(Text key Iterable values Context context)
throws IOException InterruptedException {
ArrayList array = new ArrayList();
int ciNum = 1;
for (Text val : values) {
array.add(val.toString());
ciNum =
- 上一篇:java汉诺塔动画实现
- 下一篇:jsr173_1.0_api.jar
相关资源
- 大数据hadoop,spark,hive等等面试汇总
- 《Hadoop with Python》 pdf
- 绝对可用hadoop-eclipse-plugin-1.1.2.jar
- 筛选文章热词
- hadoop-common-2.7.1-bin
- hadoop2.5.0 snappy编译jar包
- Hadoop 2.7.4 Windows 64位 编译bin包含win
- hadoop-hdfs-2.7.7.jar
- 修复版 hadoop-0.21.0-eclipse-plugin.jar
- hadoop-eclipse-plugin-1.2.1.jar
- Native Hadoop3.2.1 Library 64位编译
- hadoop-common-2.7.3.jar
- avro-1.8.1
- TFIDF算法java实现
- Hadoop2.7.7配合使用的winutil
- hadoop 32位lib包
- hadoop电商网站分析系统毕业设计项目
- hadoop-eclipse-plugin-2.7.5.jar275778
- Hadoop-Eclipse插件2.7.6
- JavaWeb操作hadoop2.6 HDFS从页面上传,,
- hadoop-eclipse-plugin-2.7.3.jar 插件
- hadoop-eclipse-plugin-2.7.2完美兼容版
- hadoop-eclipse-plugin-2.6.4.jar
- MapReduce分析年气象数据用源代码
- hadoop-eclipse-plugin-2.8.5.jar
- hadoop-eclipse-plugin-3.1.2.jar
- Hadoop的jar包
- hadoop-eclipse-plugin-2.9.2.jar 插件
- hadoop-eclipse-plugin-2.7.7.jar 插件
- hadoop-3.1.3.tar.gz
评论
共有 条评论