java实现对Hadoop的操作 java实现对Hadoop的操作

基本操作

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.Test; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; @RunWith(JUnit4.class)@DisplayName("Test using junit4")public class HadoopClientTest {private FileSystem fileSystem = null; @BeforeEachpublic void init() throws URISyntaxException, IOException, InterruptedException {Configuration configuration = new Configuration(); configuration.set("dfs.replication", "1"); configuration.set("dfs.blocksize", "64m"); fileSystem = FileSystem.get(new URI("hdfs://hd-even-01:9000"), configuration, "root"); }/*** 从本地复制文件到Hadoop** @throws URISyntaxException* @throws IOException* @throws InterruptedException*/@Testpublic void copyFileFromLocal() throws URISyntaxException, IOException, InterruptedException {// 上传文件fileSystem.copyFromLocalFile(new Path("C:\\Users\\Administrator\\Desktop\\win10激活.txt"), new Path("/even1")); // 关闭流，报错winUtils,因为使用了linux的tar包，如果windows要使用，则需要编译好这个winUtils包才能使用fileSystem.close(); }/*** 从Hadoop下载文件到本地，下载需要配置Hadoop环境，并添加winutils到bin目录** @throws URISyntaxException* @throws IOException* @throws InterruptedException*/@Testpublic void copyFileToLocal() throws URISyntaxException, IOException, InterruptedException {// 下载文件fileSystem.copyToLocalFile(new Path("/win10激活.txt"), new Path("E:/")); // 关闭流，报错winUtils,因为使用了linux的tar包，如果windows要使用，则需要编译好这个winUtils包才能使用fileSystem.close(); }/*** 创建文件夹** @throws IOException*/@Testpublic void hdfsMkdir() throws IOException {// 调用创建文件夹方法fileSystem.mkdirs(new Path("/even1")); // 关闭方法fileSystem.close(); }/*** 移动文件/修改文件名*/public void hdfsRename() throws IOException {fileSystem.rename(new Path(""), new Path("")); fileSystem.close(); }/*** 删除文件/文件夹** @throws IOException*/@Testpublic void hdfsRm() throws IOException {//fileSystem.delete(new Path("")); // 第二个参数表示递归删除fileSystem.delete(new Path(""), true); fileSystem.close(); }/*** 查看hdfs指定目录的信息** @throws IOException*/@Testpublic void hdfsLs() throws IOException {// 调用方法返回远程迭代器，第二个参数是把目录文件夹内的文件也列出来RemoteIterator listFiles = fileSystem.listFiles(new Path("/"), true); while (listFiles.hasNext()) {LocatedFileStatus locatedFileStatus = listFiles.next(); System.out.println("文件路径：" + locatedFileStatus.getPath()); System.out.println("块大小：" + locatedFileStatus.getBlockSize()); System.out.println("文件长度：" + locatedFileStatus.getLen()); System.out.println("副本数量：" + locatedFileStatus.getReplication()); System.out.println("块信息：" + Arrays.toString(locatedFileStatus.getBlockLocations())); }fileSystem.close(); }/*** 判断是文件还是文件夹*/@Testpublic void findHdfs() throws IOException {// 1，展示状态信息FileStatus[] listStatus = fileSystem.listStatus(new Path("/")); // 2,遍历所有文件for (FileStatus fileStatus : listStatus) {if (fileStatus.isFile())System.out.println("是文件：" + fileStatus.getPath().getName()); else if (fileStatus.isDirectory())System.out.println("是文件夹：" + fileStatus.getPath().getName()); }fileSystem.close(); }}

文件读写

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Before; import org.junit.Test; import org.junit.jupiter.api.DisplayName; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import java.io.*; import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.util.Arrays; @RunWith(JUnit4.class)@DisplayName("this is read write test!")public class HadoopReadWriteTest {FileSystem fileSystem = null; Configuration configuration = null; @Beforepublic void init() throws URISyntaxException, IOException, InterruptedException {// 1，加载配置configuration = new Configuration(); // 2，构建客户端fileSystem = FileSystem.get(new URI("hdfs://hd-even-01:9000/"), configuration, "root"); }@Testpublic void testReadData() throws IOException {// 1，获取hdfs文件流FSDataInputStream open = fileSystem.open(new Path("/win10激活.txt")); // 2，设置一次获取的大小byte[] bytes = new byte[1024]; // 3，读取数据while (open.read(bytes) != -1)System.out.println(Arrays.toString(bytes)); open.close(); fileSystem.close(); }/*** 使用缓存流** @throws IOException*/@Testpublic void testReadData1() throws IOException {FSDataInputStream open = fileSystem.open(new Path("/win10激活.txt")); // 使用缓冲流会快点BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(open, StandardCharsets.UTF_8)); String line = ""; while ((line = bufferedReader.readLine()) != null) {System.out.println(line); }bufferedReader.close(); open.close(); fileSystem.close(); }/*** 指定偏移量来实现只读部分内容*/@Testpublic void readSomeData() throws IOException {FSDataInputStream open = fileSystem.open(new Path("/win10激活.txt")); // 指定开始的indexopen.seek(14); // 指定读的多少byte[] bytes = new byte[5]; while (open.read(bytes) != -1)System.out.println(new String(bytes)); open.close(); fileSystem.close(); }/*** 流方式写数据* @throws IOException*/@Testpublic void writeData() throws IOException {// 1，获取输出流FSDataOutputStream out = fileSystem.create(new Path("/win11.txt"), false); // 2，获取需要写的文件输入流FileInputStream in = new FileInputStream(new File("C:\\Users\\Administrator\\Desktop\\xixi.txt")); byte[] b = new byte[1024]; int read = 0; while ((read = in.read(b)) != -1) {out.write(b, 0, read); }in.close(); out.close(); fileSystem.close(); }/*** 直接写字符串*/@Testpublic void writeData1() throws IOException {// 1，创建输出流FSDataOutputStream out = fileSystem.create(new Path("/aibaobao.txt"), false); // 2，写数据out.write("wochaoaibaobao".getBytes()); // 3,关闭流IOUtils.closeStream(out); fileSystem.close(); }/*** IOUtils方式上传** @throws IOException*/@Testpublic void putToHdfs() throws IOException {// 1，获取输入流FileInputStream in = new FileInputStream(new File("C:\\Users\\Administrator\\Desktop\\xixi.txt")); // 2，获取输出流FSDataOutputStream out = fileSystem.create(new Path("/haddopPut.txt"), false); // 3，拷贝IOUtils.copyBytes(in, out, configuration); // 4，关闭流IOUtils.closeStream(in); IOUtils.closeStream(out); fileSystem.close(); }/*** IOUtils方式下载* @throws IOException*/@Testpublic void getFromHdfs() throws IOException {// 1,获取输入流FSDataInputStream open = fileSystem.open(new Path("/haddopPut.txt")); // 2,获取输出流FileOutputStream out = new FileOutputStream(new File("C:\\Users\\Administrator\\Desktop\\haddopPut.txt")); // 3，拷贝IOUtils.copyBytes(open, out, configuration); // 4，关闭流IOUtils.closeStream(open); IOUtils.closeStream(out); fileSystem.close(); }}

【java实现对Hadoop的操作】到此这篇关于java实现对Hadoop的操作的文章就介绍到这了,更多相关Java Hadoop内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家！