初始化

This commit is contained in:
dev_xulongjin 2025-04-11 09:50:09 +08:00
commit 5cf3b30faa
17 changed files with 620 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
/**/target
/**/.idea
*.iml
*.class
**/*/dependency-reduced-pom.xml

25
hbase-lesson/pom.xml Normal file
View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.vscoder</groupId>
<artifactId>hbase-lesson</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.4.17</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,7 @@
package cn.vscoder;
public class Main {
public static void main(String[] args) {
System.out.println("Hello, World!");
}
}

View File

@ -0,0 +1,22 @@
log4j.rootCategory=ERROR,console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Set the default spark-shell log level to ERROR. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=ERROR
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=ERROR
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=ERROR
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=ERROR
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR

184
spark-lesson/pom.xml Normal file
View File

@ -0,0 +1,184 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>spark-lesson</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.12.2</scala.version>
</properties>
<repositories>
<repository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
<dependencies>
<!--引入Scala依赖库-->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.8</version>
</dependency>
<!-- Spark各组件库================================================================== -->
<!--Spark核心库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Spark SQL所需库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Spark Streaming所需库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Spark Streaming针对Kafka的依赖库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Graphx的依赖库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!-- Spark读写HBase所需库================================================================== -->
<!-- Hadoop通用API -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.1</version>
</dependency>
<!-- Hadoop客户端API -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.1</version>
</dependency>
<!-- HBase客户端API -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.4.9</version>
</dependency>
<!-- HBase针对MapReduce的API -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>2.4.9</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>2.4.9</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.specs</groupId>
<artifactId>specs</artifactId>
<version>1.2.5</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.scala-tools/maven-scala-plugin -->
<dependency>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.12</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.maven.plugins/maven-eclipse-plugin -->
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.5.1</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<args>
<arg>-target:jvm-1.5</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<configuration>
<downloadSources>true</downloadSources>
<buildcommands>
<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
</buildcommands>
<additionalProjectnatures>
<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
</additionalProjectnatures>
<classpathContainers>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
</classpathContainers>
</configuration>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</reporting>
</project>

View File

@ -0,0 +1,7 @@
package org.example;
public class Main {
public static void main(String[] args) {
System.out.println("Hello, World!");
}
}

View File

@ -0,0 +1,22 @@
log4j.rootCategory=ERROR,console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Set the default spark-shell log level to ERROR. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=ERROR
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=ERROR
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=ERROR
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=ERROR
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR

View File

@ -0,0 +1,5 @@
hadoop hadoop java
python spark
flink java
flink mysql
java spark

View File

@ -0,0 +1,86 @@
package date_20250401
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object task {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local") // 启动本地化计算默认1个CPU核数
.setAppName("testRdd")
// 设置本程序名称
// Spark程序的编写都是从SparkContext开始的
val sc = new SparkContext(conf)
// 1map算子现有一个序列Seq(2, 3, 4, 5,6)请使用map算子序列每个元素乘以一个数这个数是你学号最后两位数然后打印结果
println("----1----")
// val data1: RDD[Int] = sc.parallelize(Seq(2, 3, 4, 5, 6))
// data1.map(_ * 28).foreach(println)
// 2flatMap算子现有一个序列Seq("Hello lily", "Hello lucy", "Hello 你的名字拼音")请使用flatMap算子按空格切分单词输出一个新的序列
println("----2----")
val data2: RDD[String] = sc.parallelize(Seq("Hello lily", "Hello lucy", "Hello xulongjin"))
data2.flatMap(_.split(" ")).foreach(println)
// 3filter算子现有一个序列Seq(4, 5,62, 31, 4, 50,6)请使用filter算子过滤大于10的元素输出一个新的序列
println("----3----")
val data3: RDD[Int] = sc.parallelize(Seq(4, 5, 62, 31, 4, 50, 6))
data3.filter(_ > 10).foreach(println)
// 4mapValues算子现有一个序列Seq(("a",11), ("b",21), ("c",43))请使用mapValues算子每个Value值乘以20输出一个新的序列
println("----4----")
val data4: RDD[(String, Int)] = sc.parallelize(Seq(("a", 11), ("b", 21), ("c", 43)))
data4.mapValues(_ * 20).foreach(println)
// 5sample算子现有一个序列Seq(1,2,3,4,5,6,7,8,9,10)请使用sample算子进行抽样抽样参数自定义输出一个新的序列
println("----5----")
val data5: RDD[Int] = sc.parallelize(Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
data5.sample(withReplacement = true, 0.7, 3).foreach(println)
// 6union算子请自定义两个序列然后使用union算子输出一个新的序列
println("----6----")
val data6_1: RDD[Int] = sc.parallelize(Seq(1, 2, 3, 4, 5))
val data6_2: RDD[Int] = sc.parallelize(Seq(6, 7, 8, 9, 10))
val data6_re: RDD[Int] = data6_1.union(data6_2)
data6_re.foreach(println)
// 7intersection算子请自定义两个序列然后使用intersection算子求它们的交集输出一个新的序列
println("----7----")
val data7_1: RDD[Int] = sc.parallelize(Seq(1, 2, 3, 4, 5, 6, 7, 8))
val data7_2: RDD[Int] = sc.parallelize(Seq(2, 4, 6, 8, 10))
data7_1.intersection(data7_2).foreach(println)
// 8distinct算子请自定义一个序列然后使用distinct算子输出一个新的序列
println("----8----")
val data8: RDD[Int] = sc.parallelize(Seq(1, 2, 3, 4, 5, 6, 7, 8, 2, 4, 6, 8, 10))
data8.distinct().foreach(println)
// 9reduceByKey算子现有一个序列Seq(("a",1), ("b",1), ("c",1), ("a",1), ("d",1), ("c",1))然后使用reduceByKey算子输出一个新的序列
println("----9----")
val data9: RDD[(String, Int)] = sc.parallelize(Seq(("a", 1), ("b", 1), ("c", 1), ("a", 1), ("d", 1), ("c", 1)))
data9.reduceByKey(_ + _).foreach(println)
// 10groupByKey算子现有一个序列Seq(("a",1), ("b",1), ("c",1), ("a",1), ("d",1), ("c",1))然后使用groupByKey算子输出一个新的序列
println("----10----")
val data10: RDD[(String, Int)] = sc.parallelize(Seq(("a", 1), ("b", 1), ("c", 1), ("a", 1), ("d", 1), ("c", 1)))
data10.groupByKey().foreach(println)
// 11join算子请自定义两个map结构的序列然后使用join算子输出一个新的序列
println("----11----")
val data11_1: RDD[(String, Int)] = sc.parallelize(Seq(("a", 1), ("a", 2), ("b", 1)))
val data11_2: RDD[(String, Int)] = sc.parallelize(Seq(("a", 10), ("a", 11), ("a", 12)))
data11_1.join(data11_2).foreach(println)
// 12sortBy算子现有一个序列Seq(("e", 4),("a", 3), ("b", 2), ("c", 1), ("d", 5))然后使用sortBy算子分别按照key值和value值排序输出一个新的序列
println("----12----")
val data12: RDD[(String, Int)] = sc.parallelize(Seq(("e", 4), ("a", 3), ("b", 2), ("c", 1), ("d", 5)))
data12.sortBy(_._1).foreach(println)
println("----------")
data12.sortBy(_._2, false).foreach(println)
}
}

View File

@ -0,0 +1,22 @@
package date_20250401
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object task2 {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local").setAppName("testRdd")
val sc = new SparkContext(conf)
val data: RDD[String] = sc.textFile("/Volumes/Data/04CodeData/gcc-project-25-2/spark-lesson/src/main/scala/date_20250401/data/word")
data.flatMap(_.split(" "))
.map(x => (x, 1))
.reduceByKey(_ + _)
.sortBy(_._2, false)
.collect()
.foreach(println)
}
}

View File

@ -0,0 +1,30 @@
package date_20250408
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WCLocalFile {
def main(args: Array[String]) {
/**
* SparkContext 的初始化需要一个SparkConf对象
* SparkConf包含了Spark集群的配置的各种参数 */
val conf = new SparkConf().setMaster("local")
//启动本地化计算默认1个CPU核数
//.setMaster("local[2]")//启动本地化计算,2个CPU核数
//.setMaster("local[*]")//启动本地化计算,所有CPU核数
.setAppName("Spark-WordCount")
//设置本程序名称
//Spark程序的编写都是从SparkContext开始的
val sc = new SparkContext(conf)
val data = sc.textFile("spark-lesson/src/main/scala/date_20250408/data/README.md")
//读取本地文件
val result: RDD[(String, Int)] = data.flatMap(_.split(" "))
//下划线是占位符flatMap是对行操作的方法对读入的数据进行分割
.map((_, 1))
//将每一项转换为key-value数据是keyvalue是1
.reduceByKey(_ + _)
//将具有相同key的项相加合并成一个
//保存结果在本地文件
result.saveAsTextFile("spark-lesson/out/result")
}
}

View File

@ -0,0 +1,31 @@
package date_20250408
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/** Spark单词计数程序* */
object WordCount {
def main(args: Array[String]): Unit = {
//创建SparkConf对象存储应用程序的配置信息
val conf = new SparkConf() //设置应用程序名称可以在Spark Web UI中显示
conf.setAppName("Spark-WordCount") //设置集群Master节点访问地址
//conf.setMaster("local");
conf.setMaster("spark://hadoop102:7077")
//创建SparkContext对象,该对象是提交Spark应用程序的入口
val sc = new SparkContext(conf);
//读取指定路径(取程序执行时传入的第一个参数)中的文件内容生成一个RDD集合
val linesRDD: RDD[String] = sc.textFile(args(0))
//将RDD的每个元素按照空格进行拆分并将结果合并为一个新的RDD
val wordsRDD: RDD[String] = linesRDD.flatMap(_.split(" "))
//将RDD中的每个单词和数字1放到一个元组里(word,1)
val paresRDD: RDD[(String, Int)] = wordsRDD.map((_, 1))
//对单词根据key进行聚合对相同的key进行value的累加
val wordCountsRDD: RDD[(String, Int)] = paresRDD.reduceByKey(_ + _)
//按照单词数量降序排列
val wordCountsSortRDD: RDD[(String, Int)] = wordCountsRDD.sortBy(_._2, false) //保存结果到指定的路径(取程序执行时传入的第二个参数)
wordCountsSortRDD.saveAsTextFile(args(1)) //停止SparkContext结束该任务
sc.stop();
}
}

View File

@ -0,0 +1,32 @@
package date_20250408
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/** Spark单词计数程序* */
object WordCountHdfsFile {
def main(args: Array[String]): Unit = {
System.setProperty("HADOOP_USER_NAME", "root")
//创建SparkConf对象存储应用程序的配置信息
val conf = new SparkConf().setMaster("local")
//启动本地化计算默认1个CPU核数
.setAppName("Spark-WordCount")
//创建SparkContext对象,该对象是提交Spark应用程序的入口
val sc = new SparkContext(conf);
//读取指定路径(取程序执行时传入的第一个参数)中的文件内容生成一个RDD集合
//val linesRDD: RDD[String] = sc.textFile(args(0))
val linesRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/wordcount2.txt")
//将RDD的每个元素按照空格进行拆分并将结果合并为一个新的RDD
val wordsRDD: RDD[String] = linesRDD.flatMap(_.split(" "))
//将RDD中的每个单词和数字1放到一个元组里(word,1)
val paresRDD: RDD[(String, Int)] = wordsRDD.map((_, 1))
//对单词根据key进行聚合对相同的key进行value的累加
val wordCountsRDD: RDD[(String, Int)] = paresRDD.reduceByKey(_ + _)
//按照单词数量降序排列
val wordCountsSortRDD: RDD[(String, Int)] = wordCountsRDD.sortBy(_._2, false) //保存结果到指定的路径(取程序执行时传入的第二个参数)
//wordCountsSortRDD.saveAsTextFile(args(1))
wordCountsSortRDD.saveAsTextFile("hdfs://hadoop102:8020/output/result1") //停止SparkContext结束该任务
sc.stop();
}
}

View File

@ -0,0 +1,125 @@
# Apache Spark
Spark is a unified analytics engine for large-scale data processing. It provides
high-level APIs in Scala, Java, Python, and R, and an optimized engine that
supports general computation graphs for data analysis. It also supports a
rich set of higher-level tools including Spark SQL for SQL and DataFrames,
pandas API on Spark for pandas workloads, MLlib for machine learning, GraphX for graph processing,
and Structured Streaming for stream processing.
<https://spark.apache.org/>
[![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_main.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_main.yml)
[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
[![PySpark Coverage](https://codecov.io/gh/apache/spark/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/spark)
[![PyPI Downloads](https://static.pepy.tech/personalized-badge/pyspark?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads)](https://pypi.org/project/pyspark/)
## Online Documentation
You can find the latest Spark documentation, including a programming
guide, on the [project web page](https://spark.apache.org/documentation.html).
This README file only contains basic setup instructions.
## Building Spark
Spark is built using [Apache Maven](https://maven.apache.org/).
To build Spark and its example programs, run:
```bash
./build/mvn -DskipTests clean package
```
(You do not need to do this if you downloaded a pre-built package.)
More detailed documentation is available from the project site, at
["Building Spark"](https://spark.apache.org/docs/latest/building-spark.html).
For general development tips, including info on developing Spark using an IDE, see ["Useful Developer Tools"](https://spark.apache.org/developer-tools.html).
## Interactive Scala Shell
The easiest way to start using Spark is through the Scala shell:
```bash
./bin/spark-shell
```
Try the following command, which should return 1,000,000,000:
```scala
scala> spark.range(1000 * 1000 * 1000).count()
```
## Interactive Python Shell
Alternatively, if you prefer Python, you can use the Python shell:
```bash
./bin/pyspark
```
And run the following command, which should also return 1,000,000,000:
```python
>>> spark.range(1000 * 1000 * 1000).count()
```
## Example Programs
Spark also comes with several sample programs in the `examples` directory.
To run one of them, use `./bin/run-example <class> [params]`. For example:
```bash
./bin/run-example SparkPi
```
will run the Pi example locally.
You can set the MASTER environment variable when running examples to submit
examples to a cluster. This can be a mesos:// or spark:// URL,
"yarn" to run on YARN, and "local" to run
locally with one thread, or "local[N]" to run locally with N threads. You
can also use an abbreviated class name if the class is in the `examples`
package. For instance:
```bash
MASTER=spark://host:7077 ./bin/run-example SparkPi
```
Many of the example programs print usage help if no params are given.
## Running Tests
Testing first requires [building Spark](#building-spark). Once Spark is built, tests
can be run using:
```bash
./dev/run-tests
```
Please see the guidance on how to
[run tests for a module, or individual tests](https://spark.apache.org/developer-tools.html#individual-tests).
There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md
## A Note About Hadoop Versions
Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
storage systems. Because the protocols have changed in different versions of
Hadoop, you must build Spark against the same version that your cluster runs.
Please refer to the build documentation at
["Specifying the Hadoop Version and Enabling YARN"](https://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version-and-enabling-yarn)
for detailed guidance on building for a particular distribution of Hadoop, including
building for particular Hive and Hive Thriftserver distributions.
## Configuration
Please refer to the [Configuration Guide](https://spark.apache.org/docs/latest/configuration.html)
in the online documentation for an overview on how to configure Spark.
## Contributing
Please review the [Contribution to Spark guide](https://spark.apache.org/contributing.html)
for information on how to get started contributing to the project.

View File

@ -0,0 +1,5 @@
Hadoop Common The common utilities that support the other Hadoop modules
Hadoop Distributed File System HDFS A distributed file system that provides high-throughput access to application data
Hadoop YARN A framework for job scheduling and cluster resource management
Hadoop MapReduce A YARN-based system for parallel processing of large data sets
Who Uses Hadoop

View File

@ -0,0 +1,6 @@
object test {
def main(args: Array[String]): Unit = {
println("Hello world")
}
}

5
src/Main.java Normal file
View File

@ -0,0 +1,5 @@
public class Main {
public static void main(String[] args) {
System.out.println("Hello, World!");
}
}