Useful Scala Code Snippets

19-04-26 编程 #code #scala

merge two map and sum its values

多个 map 合并,key 相同时则 value 相加

val map1 = Map(1 -> 1, 2 -> 2)
val map2 = Map(1 -> 11, 3 -> 3)
val map3 = Map(1 -> 111, 3 -> 3)

val mapList = List(map1, map2, map3)

val merged = mapList.reduce((m1, m2) =>
  m1 ++ m2.map { case (k, v) => k -> (v + m1.getOrElse(k, 0)) }
)

文件读

// """"""可以避免\\符号
val file = """d:\data\file.txt"""
for (line <- Source.fromFile(file, encoding).getLines()) {
    print(line)
}

文件写

  //资源管理
  def using[A <: {def close() : Unit}, R](resource: A)(fun: A => R): R = {
    import scala.language.reflectiveCalls
    try {
      fun(resource)
    } finally {
      resource.close()
    }
  }
  
  using(new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8)) {
      writer => writer.write(s"""${line}\n""")
  }

统计词频

 val nanoUnit = 1000000

  //分词并统计词频
  def main(args: Array[String]): Unit = {
    val path = """D:\code\ideaProjects\scala-notes\data\src\out"""
    val files: List[File] = new File(path).listFiles.filter(_.isFile).toList
    val start = System.nanoTime()
    val wfList = ListBuffer[mutable.Map[String, Long]]()
    val futures = for (file <- files) yield Future {
      countWrodsInFile(file, "UTF-8")
    }
    for (f <- futures) {
      val words: mutable.Map[String, Long] = Await.result(f, Duration.Inf)
      wfList += words
    }
    //merge the word frequency map
    val finalWf = wfList.reduce((m1, m2) =>
      m1 ++ m2.map { case (k, v) => k -> (v + m1.getOrElse(k, 0L)) }
    )
    val end = System.nanoTime()
    println(s"container size=${finalWf.size}")
    // sort map
    val wordsFreq = finalWf.toList.sortWith(_._2 > _._2)

    write2file(wordsFreq, Paths.get(path, "final.txt").toFile)
    println(s"total used time = ${(end - start) / nanoUnit} ms")
    println(s"cups = ${Runtime.getRuntime.availableProcessors()}")
  }

  def countWrodsInFile(file: File, encoding: String): mutable.Map[String, Long] = {
    val wf = mutable.Map[String, Long]().withDefaultValue(0)
    for (line <- Source.fromFile(file, encoding).getLines()) {
      val l = line.trim
      wf.update(l, wf(l) + 1)
    }
    println(s"${file.getName} has words:${wf.size}")
    wf
  }

  def write2file(wf: Seq[(String, Long)], out: File): Unit = {
    using(new OutputStreamWriter(new FileOutputStream(out), StandardCharsets.UTF_8)) {
      writer =>
        for (it <- wf) {
          writer.write(s"""${it._1} ${it._2}\n""")
        }
    }
  }