Why does my program use so much memory?

For an entire 25 megabyte file, memory usage is constant at 792 MB! I thought this was due to my use from the list, but moving certain parts of the code for the vector (for example, arrays where fft is used) did not change how much memory is used at all!

{-# LANGUAGE OverloadedStrings,BangPatterns #-}
import qualified Data.Attoparsec.Char8 as Ap
import Data.Attoparsec
import Control.Monad
import Control.Applicative
--import Control.DeepSeq (force)
import System.IO 
import System.Environment
import Data.List (zipWith4,unzip4,zip4,foldl')
import Data.Bits
import Data.Complex
import Data.String (fromString)
import Data.ByteString.Internal
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as Bl 
import qualified Data.Vector.Unboxed as Vu
import qualified Statistics.Transform  as St



{-
I run a test on a collection of data from a file
[(1,t),(2,t),(3,t),(4,t),(5,t)]
   -     -     - 
   |     -     -     -
   |     |     -     -     -
   |     |     |
 [y++t,  n,  y++t]
To do that, I use splitN to create a list of list
[[(1,t),(2,t),(3,t)],[(2,t),(3,t),(4,t)],[(3,t),(4,t),(5,t)]]
Map a serie of functions to determine a value for each inner collection,
and return when an event happened.



-}

data FourD b a = FourD  a a a b

instance Functor (FourD c) where  
    fmap f (FourD x y z d) = FourD  (f x) (f y) (f z) d  

mgrav_per_bit = [ 18, 36, 71, 143, 286, 571, 1142 ]
--Converting raw data to mg
aToG :: Int -> Double    
aToG a = fromIntegral . sign $  uresult 
    where   
        twocomp = if a>128
                  then 256-a
                  else a
        uresult = sum  $ zipWith (*)   mgrav_per_bit (map (fromEnum . testBit  twocomp) [0..7])
        sign = if a > 128 
               then negate 
               else id


--Data is (int,int,int,time)
--Converted to (St.CD^3,Bytestring) in place of maping afterwards.                  
parseAcc :: Parser (FourD B.ByteString St.CD)
parseAcc = do   Ap.char '('
                x <-  fmap ((:+0) . aToG) Ap.decimal  
                Ap.char ','
                y <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                z <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                time <- takeTill (== 41)
                Ap.char ')'
                return $! FourD x y z time
--applies parseAcc to many lines, fails at the end of file (Need to add a newline)
parseFile = many $ parseAcc <* (Ap.endOfInput <|> Ap.endOfLine)


readExpr input = case parse parseFile  input of
     Done b val -> val
     Partial p -> undefined
     Fail a b c -> undefined 

unType  (FourD  x y d z) = (x ,y ,d ,z)          


-- Breaks a list of FourD into smaller lists, apply f and g to those lists, then filter the result based if an even happened or not
amap  :: (Num c, Ord c) =>     ([a] -> [c]) -> ([d] -> [ByteString]) -> [FourD d a] -> [Bl.ByteString]
amap f g = (uncurry4 (zipWith4 (filterAcc))). map4 f g . unzip4 . map (unType)
    where map4 f g (a,b,c,d) = (f a,f b,f c,g d)
          uncurry4 f (a,b,c,d) = f a b c d 

-- before  i had map filterAcc,outside amap. Tried to fuse everything to eliminate intermediaries

-- An event is detected if x > 50
filterAcc  x y z t = if x > 50
                                then  (Bl.pack . B.unpack) $ "yes: " `B.append`  t  
                                else  ""
-- split [St.CD] in [(Vector St.CD)], apply fft to each, and compress to a single value. 
-- Core of the application
fftAcross :: [St.CD] -> [Int]
fftAcross = map (floor . noiseEnergy .  St.fft) . splitN 32 

-- how the value is determined (sum of all magnitudes but the first one)
noiseEnergy  :: (RealFloat a, Vu.Unbox a) => Vu.Vector (Complex a) -> a
noiseEnergy  x = (Vu.foldl' (\b a-> b+(magnitude a)) 0 (Vu.drop 1 x))/32

-- how the values are split in (Vector St.CD), if lenght > 32, takes 32, otherwhise I'm done
splitN :: Vu.Unbox a => Int -> [a] -> [Vu.Vector a]
splitN n x =  helper x 
    where
    helper x   = if     atLeast n x 
                 then   (Vu.take n (Vu.fromList x)) : (helper  (drop 1 x) )
                 else  []
-- Replacing the test by atLeast in place of a counter (that compared to length x,calculated once) reduced the behaviour that memory usage was constant.     

-- this is replicated so the behaviour of splitN happens on the time part of FourD, Can't use the same since there is no Vector Bytestring instance                
splitN2 n x =  helper x 
    where
    helper x   = if   atLeast n x 
                 then  (head   x) : (helper  (drop 1 x))
                 else  []

atLeast :: Int -> [a] -> Bool
atLeast 0 _      = True
atLeast _ []     = False
atLeast n (_:ys) = atLeast (n-1) ys



main = do    

    filename <- liftM head getArgs
    filehandle <- openFile "results.txt" WriteMode
    contents <- liftM readExpr $ B.readFile filename
    Bl.hPutStr (filehandle) .  Bl.unlines .  splitAndApplyAndFilter  $ contents where
        splitAndApplyAndFilter  = amap fftAcross (splitN2 32)  

Edit: after some refactoring, fusing some cards, decreasing the length, I managed to get this working on 400 ~ with an input file of 25 MB. However, at 100 MB it takes up 1.5 GB.

The program is designed to determine whether any event occurred at a time, for which it requests a set of values โ€‹โ€‹(im using 32 atm), runs fft in it, sums up these values โ€‹โ€‹and sees if the threshold passes. If yes, type the time for the file.

http://db.tt/fT8kXPKz 25mb

+5
1

- reddit ! Haskell Attoparsec

, attoparsec , haskell ( 100 )

, , .

, , 120 800 ( 116 ), !

, -, :

readExpr input = case parse (parseAcc<*(Ap.endOfLine<*Ap.endOfInput<|>Ap.endOfLine)) input of
     Done b val -> val : readExpr b
     Partial  e -> []
     Fail _ _ c -> error c 

:

{-# LANGUAGE OverloadedStrings,BangPatterns #-}
import qualified Data.Attoparsec.Char8 as Ap
import Data.Attoparsec
import Control.Monad
import Control.Applicative
--import Control.DeepSeq (force)
import System.IO 
import System.Environment
import Data.List (zipWith4,unzip4,zip4,foldl')
import Data.Bits
import Data.Complex
import Data.String (fromString)
import Data.ByteString.Internal
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as Bl 
import qualified Data.Vector.Unboxed as Vu
import qualified Statistics.Transform  as St


{-
I run a test on a collection of data from a file
[(1,t),(2,t),(3,t),(4,t),(5,t)]
   -     -     - 
   |     -     -     -
   |     |     -     -     -
   |     |     |
 [y++t,  n,  y++t]
To do that, I use splitN to create a list of list
[[(1,t),(2,t),(3,t)],[(2,t),(3,t),(4,t)],[(3,t),(4,t),(5,t)]]
Map a serie of functions to determine a value for each inner collection,
and return when an event happened.



-}

data FourD b a = FourD  a a a b

instance Functor (FourD c) where  
    fmap f (FourD x y z d) = FourD  (f x) (f y) (f z) d  

mgrav_per_bit = [ 18, 36, 71, 143, 286, 571, 1142 ]
--Converting raw data to mg
aToG :: Int -> Double    
aToG a = fromIntegral . sign $  uresult 
    where   
        twocomp 
            | a>128     = 256-a
            | otherwise =     a
        uresult = sum  $ zipWith (*)   mgrav_per_bit (map (fromEnum . testBit  twocomp) [0..7])
        sign 
            | a > 128   = negate
            | otherwise =     id


--Data is (int,int,int,time)
--Converted to (St.CD^3,Bytestring) in place of maping afterwards.                  
parseAcc :: Parser (FourD B.ByteString St.CD)
parseAcc = do   Ap.char '('
                x <-  fmap ((:+0) . aToG) Ap.decimal  -- Parse, transform to mg, convert to complex
                Ap.char ','
                y <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                z <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                time <- takeTill (== 41)
                Ap.char ')'
                return $! FourD x y z time
--applies parseAcc to many lines, fails at the end of file (Need to add a newline)
parseFile = many $ parseAcc <* (Ap.endOfInput <|> Ap.endOfLine)


readExpr input = case parse (parseAcc<*(Ap.endOfLine<*Ap.endOfInput<|>Ap.endOfLine)) input of
     Done b val -> val : readExpr b
     Partial  e -> []
     Fail _ _ c -> error c 

unType  (FourD  x y d z) = (x ,y ,d ,z)          


-- Breaks a list of FourD into smaller lists, apply f and g to those lists, then filter the result based if an even happened or not
amap  :: (Num c, Ord c) =>     ([a] -> [c]) -> ([d] -> [ByteString]) -> [FourD d a] -> [ByteString]
amap f g = (uncurry4 (zipWith4 (filterAcc))). map4 f g . unzip4 . map (unType)
    where map4 f g (a,b,c,d) = (f a,f b,f c,g d)
          uncurry4 f (a,b,c,d) = f a b c d 

-- before  i had map filterAcc,outside amap. Tried to fuse everything to eliminate intermediaries

-- An event is detected if x > 50
filterAcc  x y z t 
              | x > 50    = t
              | otherwise = ""

-- split [St.CD] in [(Vector St.CD)], apply fft to each, and compress to a single value. 
-- Core of the application
fftAcross :: [St.CD] -> [Int]
fftAcross = map (floor . noiseEnergy .  St.fft) . splitN 32 


-- how the value is determined (sum of all magnitudes but the first one)
noiseEnergy  :: (RealFloat a, Vu.Unbox a) => Vu.Vector (Complex a) -> a
noiseEnergy  x = (Vu.foldl' (\b a-> b+(magnitude a)) 0 (Vu.drop 1 x))/32


-- how the values are split in (Vector St.CD), if lenght > 32, takes 32, otherwhise I'm done
splitN :: Vu.Unbox a => Int -> [a] -> [Vu.Vector a]
splitN n x =  helper x 
    where
    helper x   
            | atLeast n x = (Vu.take n (Vu.fromList x)) : (helper  (drop 1 x) )
            | otherwise   = []

-- Replacing the test by atLeast in place of a counter (that compared to length x,calculated once) reduced the behaviour that memory usage was constant.     

-- this is replicated so the behaviour of splitN happens on the time part of FourD, Can't use the same since there is no Vector Bytestring instance                
splitN2 n x =  helper x 
    where
    helper x   
            | atLeast n x = (head   x) : (helper  (drop 1 x))
            | otherwise   = []

atLeast :: Int -> [a] -> Bool
atLeast 0 _      = True
atLeast _ []     = False
atLeast n (_:ys) = atLeast (n-1) ys

intervalFinder :: [ByteString]->[B.ByteString]
intervalFinder x = helper x ""
    where
    helper (x:xs) "" 
        | x /= ""   = ("Start Time: " `B.append` x `B.append` "\n"):(helper xs x)
        | otherwise = helper xs ""
    helper (x:xs) y
        | x == ""   = ( "End   Time: "`B.append`  y `B.append` "\n\n" ):(helper xs "")
        | otherwise = helper xs x
    helper _ _      = []

main = do
    filename <- liftM head getArgs
    filehandle <- openFile "results.txt" WriteMode
    contents <- liftM readExpr $ B.readFile filename
    Bl.hPutStr (filehandle) .  Bl.fromChunks . intervalFinder . splitAndApplyAndFilter  $ contents 
    hClose filehandle
    where
         splitAndApplyAndFilter  = amap fftAcross (splitN2 32)  





    --contents <- liftM ((map ( readExpr )) . B.lines) $ B.readFile filename


   {-     *Main> let g = liftM ((amap fftAcross (splitN2 32)) . readExpr) $ B.readFile "te
stpattern2.txt"
-}

   -- B.hPutStrLn (filehandle)  . B.unlines . map (B.pack . show ) .  amap (map (floor .quare) .  (filter (/=[])) . map ( (drop 1) . (map (/32)) . fft ) . splitN 32) . map ( fmap(fromIntegral . aToG)) . map readExpr $ contents
+3

All Articles