Why does my Haskell program end up with a memory error?

I'm trying to write a Haskell program to parse a huge text file (about 14 GB), but I can't figure out how to free up unused data from memory or not to make the stack overflow during foldr. Here is the source of the program:

import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.ByteString.Lex.Lazy.Double as BD
import System.Environment


data Vertex = 
    Vertex{
     vertexX :: Double,
     vertexY :: Double,
     vertexZ :: Double}
    deriving (Eq, Show, Read)

data Extent = 
    Extent{
     extentMax :: Vertex,
     extentMin :: Vertex}
    deriving (Eq, Show, Read)

addToExtent :: Extent -> Vertex -> Extent
addToExtent ext vert = Extent vertMax vertMin where
                        (vertMin, vertMax) = (makeCmpVert max (extentMax ext) vert, makeCmpVert min (extentMin ext) vert) where
                            makeCmpVert f v1 v2 = Vertex(f (vertexX v1) (vertexX v2))
                                                        (f (vertexY v1) (vertexY v2))
                                                        (f (vertexZ v1) (vertexZ v2))

readCoord :: LBS.ByteString -> Double
readCoord l = case BD.readDouble l of
                Nothing -> 0
                Just (value, _) -> value

readCoords :: LBS.ByteString -> [Double]
readCoords l | LBS.length l == 0 = []
             | otherwise = let coordWords = LBS.split ' ' l 
                            in map readCoord coordWords

parseLine :: LBS.ByteString -> Vertex
parseLine line = Vertex (head coords) (coords!!1) (coords!!2) where
    coords = readCoords line 

processLines :: [LBS.ByteString] -> Extent -> Extent
processLines strs ext = foldr (\x y -> addToExtent y (parseLine x)) ext strs

processFile :: String -> IO()
processFile name = do
    putStrLn name
    content <- LBS.readFile name
    let (countLine:recordsLines) = LBS.lines content
    case LBS.readInt countLine of
        Nothing -> putStrLn "Can't read records count"
        Just (recordsCount, _) -> do
                                    print recordsCount
                                    let vert = parseLine (head recordsLines)
                                    let ext = Extent vert vert
                                    print $ processLines recordsLines ext

main :: IO()
main = do
        args <- getArgs
        case args of
            [] -> do
                putStrLn "Missing file path"                    
            xs -> do
                    processFile (head xs)
                    return()

The text file contains lines with three floating-point numbers marked with a space character. This program always tries to occupy all the free memory on the computer and fails with a memory error.

+5
source share
2 answers

You are too lazy. Vertexand Extenthave non-line fields, and all your functions Vertexreturn

Vertex thunk1 thunk2

. addToExtent

Extent thunk1 thunk2

.

, ByteString , , Double .

Vertex Extent strict - , Vertex . Extent , ,

processLines strs ext = foldr (\x y -> addToExtent y (parseLine x)) ext strs

, ,

(\x y -> addToExtent y (parseLine x))

.

, NaN undefined, , , (!) ,

processLines strs ext = foldl' (\x y -> addToExtent x (parseLine y)) ext strs

, Vertex Extent .


, - :

addToExtent ext vert = Extent vertMax vertMin
  where
    (vertMin, vertMax) = (makeCmpVert max (extentMax ext) vert, makeCmpVert min (extentMin ext)

( , ), .

,

    (vertMax, vertMin) = ...
+5

addToExtent . :

addToExtent :: Extent -> Vertex -> Extent
addToExtent ext vert = vertMax `seq` vertMin `seq` Extent vertMax vertMin where
  (vertMin, vertMax) = (makeCmpVert max (extentMax ext) vert, makeCmpVert min (extentMinext) vert) where
    makeCmpVert f v1 v2 = Vertex(f (vertexX v1) (vertexX v2))
                      (f (vertexY v1) (vertexY v2))
                      (f (vertexZ v1) (vertexZ v2))

data Vertex = 
    Vertex{
     vertexX :: {-# UNPACK #-} !Double,
     vertexY :: {-# UNPACK #-} !Double,
     vertexZ :: {-# UNPACK #-} !Double}
    deriving (Eq, Show, Read)

, vertMin vertMax , - Extent.

Extent

data Extent = 
    Extent{
     extentMax :: !Vertex,
     extentMin :: !Vertex}
    deriving (Eq, Show, Read)

( seq addToExtent ).

+1

All Articles