@samthebest , , Spark (Hadoop). :
: fooobar.com/questions/557391/...
zip
, zip, . , .
, , : fooobar.com/questions/557381/...
, , sc.binaryFiles, PortableDataStream, :
sc.binaryFiles(path, minPartitions)
.flatMap { case (name: String, content: PortableDataStream) =>
val zis = new ZipInputStream(content.open)
Stream.continually(zis.getNextEntry)
.takeWhile(_ != null)
.flatMap { _ =>
val br = new BufferedReader(new InputStreamReader(zis))
Stream.continually(br.readLine()).takeWhile(_ != null)
}