{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash                  #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE TypeFamilies               #-}
{-# LANGUAGE UnboxedTuples              #-}
{-# LANGUAGE UnliftedFFITypes           #-}
{-# LANGUAGE Unsafe                     #-}
{-# LANGUAGE ViewPatterns               #-}

#if __GLASGOW_HASKELL__ >= 800
{-# LANGUAGE TemplateHaskellQuotes #-}
#else
{-# LANGUAGE TemplateHaskell #-}
#endif

-- |
-- Module      : Data.Text.Short.Internal
-- Copyright   : © Herbert Valerio Riedel 2017
-- License     : BSD3
--
-- Maintainer  : hvr@gnu.org
-- Stability   : stable
--
-- Memory-efficient representation of Unicode text strings.
--
-- @since 0.1
module Data.Text.Short.Internal
    ( -- * The 'ShortText' type
      ShortText(..)

      -- * Basic operations
    , null
    , length
    , isAscii
    , splitAt
    , splitAtEnd
    , indexEndMaybe
    , indexMaybe
    , isPrefixOf
    , stripPrefix
    , isSuffixOf
    , stripSuffix

    , cons
    , snoc
    , uncons
    , unsnoc

    , findIndex
    , find
    , all

    , span
    , spanEnd
    , split

    , intersperse
    , intercalate
    , reverse
    , replicate

    , filter
    , dropAround

    , foldl
    , foldl'
    , foldr
    , foldl1
    , foldl1'
    , foldr1

      -- * Conversions
      -- ** 'Char'
    , singleton

      -- ** 'String'
    , Data.Text.Short.Internal.fromString
    , toString

      -- ** 'T.Text'
    , fromText
    , toText

      -- ** 'BS.ByteString'
    , fromShortByteString
    , fromShortByteStringUnsafe
    , toShortByteString

    , fromByteString
    , fromByteStringUnsafe
    , toByteString

    , toBuilder

      -- * misc
      -- ** For Haddock

    , BS.ByteString
    , T.Text
    , module Prelude

      -- ** Internals
    , isValidUtf8
    ) where

import           Control.DeepSeq                (NFData)
import           Control.Monad.ST               (stToIO)
import           Data.Binary
import           Data.Bits
import qualified Data.ByteString                as BS
import qualified Data.ByteString.Builder        as BB
import           Data.ByteString.Short          (ShortByteString)
import qualified Data.ByteString.Short          as BSS
import qualified Data.ByteString.Short.Internal as BSSI
import           Data.Char                      (ord)
import           Data.Data                      (Data(..),constrIndex, Constr,
                                                 mkConstr, DataType, mkDataType,
                                                 Fixity(Prefix))
import           Data.Hashable                  (Hashable)
import           Data.Typeable                  (Typeable)
import qualified Data.List                      as List
import           Data.Maybe                     (fromMaybe, isNothing)
import           Data.Semigroup
import qualified Data.String                    as S
import qualified Data.Text                      as T
import           Foreign.C
import           GHC.Base                       (assert, unsafeChr)
import qualified GHC.CString                    as GHC
import           GHC.Exts                       (Addr#, ByteArray#, Int (I#),
                                                 Int#, MutableByteArray#,
                                                 Ptr (..), RealWorld, Word (W#))
import qualified GHC.Exts
import qualified GHC.Foreign                    as GHC
import           GHC.IO.Encoding
import           GHC.ST
import           Prelude                        hiding (all, any, break, concat,
                                                 drop, dropWhile, filter, foldl,
                                                 foldl1, foldr, foldr1, head,
                                                 init, last, length, null,
                                                 replicate, reverse, span,
                                                 splitAt, tail, take, takeWhile)
import           System.IO.Unsafe
import           Text.Printf                    (PrintfArg, formatArg,
                                                 formatString)

import qualified Language.Haskell.TH.Syntax     as TH

#if MIN_VERSION_text(2,0,0)
import qualified Data.Text.Internal             as TI
import qualified Data.Text.Array                as TA
#else
import qualified Data.Text.Encoding             as T
#endif

import qualified PrimOps

-- | A compact representation of Unicode strings.
--
-- A 'ShortText' value is a sequence of Unicode scalar values, as defined in
-- <http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=35 §3.9, definition D76 of the Unicode 5.2 standard >;
-- This means that a 'ShortText' is a list of (scalar) Unicode code-points (i.e. code-points in the range @[U+00 .. U+D7FF] ∪ [U+E000 .. U+10FFFF]@).
--
-- This type relates to 'T.Text' as 'ShortByteString' relates to 'BS.ByteString' by providing a more compact type. Please consult the documentation of "Data.ByteString.Short" for more information.
--
-- Currently, a boxed unshared 'T.Text' has a memory footprint of 6 words (i.e. 48 bytes on 64-bit systems) plus 2 or 4 bytes per code-point (due to the internal UTF-16 representation). Each 'T.Text' value which can share its payload with another 'T.Text' requires only 4 words additionally. Unlike 'BS.ByteString', 'T.Text' use unpinned memory.
--
-- In comparison, the footprint of a boxed 'ShortText' is only 4 words (i.e. 32 bytes on 64-bit systems) plus 1, 2, 3, or 4 bytes per code-point (due to the internal UTF-8 representation).
-- It can be shown that for realistic data <http://utf8everywhere.org/#asian UTF-16 has a space overhead of 50% over UTF-8>.
--
-- __NOTE__: The `Typeable` instance isn't defined for GHC 7.8 (and older) prior to @text-short-0.1.3@
--
-- @since 0.1
newtype ShortText = ShortText ShortByteString
                  deriving (Eq ShortText
Int -> ShortText -> Int
ShortText -> Int
forall a. Eq a -> (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: ShortText -> Int
$chash :: ShortText -> Int
hashWithSalt :: Int -> ShortText -> Int
$chashWithSalt :: Int -> ShortText -> Int
Hashable,Semigroup ShortText
ShortText
[ShortText] -> ShortText
ShortText -> ShortText -> ShortText
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [ShortText] -> ShortText
$cmconcat :: [ShortText] -> ShortText
mappend :: ShortText -> ShortText -> ShortText
$cmappend :: ShortText -> ShortText -> ShortText
mempty :: ShortText
$cmempty :: ShortText
Monoid,ShortText -> ()
forall a. (a -> ()) -> NFData a
rnf :: ShortText -> ()
$crnf :: ShortText -> ()
NFData,NonEmpty ShortText -> ShortText
ShortText -> ShortText -> ShortText
forall b. Integral b => b -> ShortText -> ShortText
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: forall b. Integral b => b -> ShortText -> ShortText
$cstimes :: forall b. Integral b => b -> ShortText -> ShortText
sconcat :: NonEmpty ShortText -> ShortText
$csconcat :: NonEmpty ShortText -> ShortText
<> :: ShortText -> ShortText -> ShortText
$c<> :: ShortText -> ShortText -> ShortText
Data.Semigroup.Semigroup,Typeable)

-- | It exposes a similar 'Data' instance abstraction as 'T.Text' (see
-- discussion referenced there for more details), preserving the
-- @[Char]@ data abstraction at the cost of inefficiency.
--
-- @since 0.1.3
instance Data ShortText where
  gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ShortText -> c ShortText
gfoldl forall d b. Data d => c (d -> b) -> d -> c b
f forall g. g -> c g
z ShortText
txt = forall g. g -> c g
z String -> ShortText
fromString forall d b. Data d => c (d -> b) -> d -> c b
`f` (ShortText -> String
toString ShortText
txt)
  toConstr :: ShortText -> Constr
toConstr ShortText
_ = Constr
packConstr
  gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ShortText
gunfold forall b r. Data b => c (b -> r) -> c r
k forall r. r -> c r
z Constr
c = case Constr -> Int
constrIndex Constr
c of
    Int
1 -> forall b r. Data b => c (b -> r) -> c r
k (forall r. r -> c r
z String -> ShortText
fromString)
    Int
_ -> forall a. (?callStack::CallStack) => String -> a
error String
"gunfold"
  dataTypeOf :: ShortText -> DataType
dataTypeOf ShortText
_ = DataType
shortTextDataType

packConstr :: Constr
packConstr :: Constr
packConstr = DataType -> String -> [String] -> Fixity -> Constr
mkConstr DataType
shortTextDataType String
"fromString" [] Fixity
Prefix

shortTextDataType :: DataType
shortTextDataType :: DataType
shortTextDataType = String -> [Constr] -> DataType
mkDataType String
"Data.Text.Short" [Constr
packConstr]

instance Eq ShortText where
  {-# INLINE (==) #-}
  == :: ShortText -> ShortText -> Bool
(==) ShortText
x ShortText
y
    | Int
lx forall a. Eq a => a -> a -> Bool
/= Int
ly  = Bool
False
    | Int
lx forall a. Eq a => a -> a -> Bool
==  Int
0  = Bool
True
    | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# (ShortText -> ByteArray#
toByteArray# ShortText
x) Int#
0# (ShortText -> ByteArray#
toByteArray# ShortText
y) Int#
0# Int#
n# of
                    Int#
0# -> Bool
True
                    Int#
_  -> Bool
False
    where
      !lx :: Int
lx@(I# Int#
n#) = ShortText -> Int
toLength ShortText
x
      !ly :: Int
ly = ShortText -> Int
toLength ShortText
y

instance Ord ShortText where
  compare :: ShortText -> ShortText -> Ordering
compare ShortText
t1 ShortText
t2
    | Int
n forall a. Eq a => a -> a -> Bool
== Int
0  = forall a. Ord a => a -> a -> Ordering
compare Int
n1 Int
n2
    | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# ByteArray#
ba1# Int#
0# ByteArray#
ba2# Int#
0# Int#
n# of
        Int#
r# | Int# -> Int
I# Int#
r# forall a. Ord a => a -> a -> Bool
< Int
0 -> Ordering
LT
           | Int# -> Int
I# Int#
r# forall a. Ord a => a -> a -> Bool
> Int
0 -> Ordering
GT
           | Int
n1 forall a. Ord a => a -> a -> Bool
< Int
n2   -> Ordering
LT
           | Int
n1 forall a. Ord a => a -> a -> Bool
> Int
n2   -> Ordering
GT
           | Bool
otherwise -> Ordering
EQ
    where
      ba1# :: ByteArray#
ba1# = ShortText -> ByteArray#
toByteArray# ShortText
t1
      ba2# :: ByteArray#
ba2# = ShortText -> ByteArray#
toByteArray# ShortText
t2
      !n1 :: Int
n1 = ShortText -> Int
toLength ShortText
t1
      !n2 :: Int
n2 = ShortText -> Int
toLength ShortText
t2
      !n :: Int
n@(I# Int#
n#) = Int
n1 forall a. Ord a => a -> a -> a
`min` Int
n2

instance Show ShortText where
    showsPrec :: Int -> ShortText -> ShowS
showsPrec Int
p (ShortText ShortByteString
b) = forall a. Show a => Int -> a -> ShowS
showsPrec Int
p (TextEncoding -> ShortByteString -> String
decodeStringShort' TextEncoding
utf8 ShortByteString
b)
    show :: ShortText -> String
show (ShortText ShortByteString
b)        = forall a. Show a => a -> String
show        (TextEncoding -> ShortByteString -> String
decodeStringShort' TextEncoding
utf8 ShortByteString
b)

instance Read ShortText where
    readsPrec :: Int -> ReadS ShortText
readsPrec Int
p = forall a b. (a -> b) -> [a] -> [b]
map (\(String
x,String
s) -> (ShortByteString -> ShortText
ShortText forall a b. (a -> b) -> a -> b
$ TextEncoding -> String -> ShortByteString
encodeStringShort TextEncoding
utf8 String
x,String
s)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Read a => Int -> ReadS a
readsPrec Int
p

-- | @since 0.1.2
instance PrintfArg ShortText where
  formatArg :: ShortText -> FieldFormatter
formatArg ShortText
txt = forall a. IsChar a => [a] -> FieldFormatter
formatString forall a b. (a -> b) -> a -> b
$ ShortText -> String
toString ShortText
txt

-- | The 'Binary' encoding matches the one for 'T.Text'
#if MIN_VERSION_binary(0,8,1)
instance Binary ShortText where
    put :: ShortText -> Put
put = forall t. Binary t => t -> Put
put forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString
    get :: Get ShortText
get = do
        ShortByteString
sbs <- forall t. Binary t => Get t
get
        case ShortByteString -> Maybe ShortText
fromShortByteString ShortByteString
sbs of
          Maybe ShortText
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Binary.get(ShortText): Invalid UTF-8 stream"
          Just ShortText
st -> forall (m :: * -> *) a. Monad m => a -> m a
return ShortText
st
#else
-- fallback via 'ByteString' instance
instance Binary ShortText where
    put = put . toByteString
    get = do
        bs <- get
        case fromByteString bs of
          Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
          Just st -> return st
#endif

-- | Since 0.1.3
instance TH.Lift ShortText where
    -- TODO: Use DeriveLift with bytestring-0.11.2.0
    lift :: forall (m :: * -> *). Quote m => ShortText -> m Exp
lift ShortText
t = [| fromString s |]
      where s :: String
s = ShortText -> String
toString ShortText
t

#if MIN_VERSION_template_haskell(2,17,0)
    liftTyped :: forall (m :: * -> *). Quote m => ShortText -> Code m ShortText
liftTyped = forall a (m :: * -> *). Quote m => m Exp -> Code m a
TH.unsafeCodeCoerce forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t (m :: * -> *). (Lift t, Quote m) => t -> m Exp
TH.lift
#elif MIN_VERSION_template_haskell(2,16,0)
    liftTyped = TH.unsafeTExpCoerce . TH.lift
#endif

-- | \(\mathcal{O}(1)\) Test whether a 'ShortText' is empty.
--
-- >>> null ""
-- True
--
-- prop> null (singleton c) == False
--
-- prop> null t == (length t == 0)
--
-- @since 0.1
null :: ShortText -> Bool
null :: ShortText -> Bool
null = ShortByteString -> Bool
BSS.null forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

-- | \(\mathcal{O}(n)\) Count the number of Unicode code-points in a 'ShortText'.
--
-- >>> length "abcd€"
-- 5
--
-- >>> length ""
-- 0
--
-- prop> length t >= 0
--
-- @since 0.1
length :: ShortText -> Int
length :: ShortText -> Int
length ShortText
st = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO CSize
c_text_short_length (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st))

foreign import ccall unsafe "hs_text_short_length" c_text_short_length :: ByteArray# -> CSize -> IO CSize

-- | \(\mathcal{O}(n)\) Test whether 'ShortText' contains only ASCII code-points (i.e. only U+0000 through U+007F).
--
-- This is a more efficient version of @'all' 'Data.Char.isAscii'@.
--
-- >>> isAscii ""
-- True
--
-- >>> isAscii "abc\NUL"
-- True
--
-- >>> isAscii "abcd€"
-- False
--
-- prop> isAscii t == all (< '\x80') t
--
-- @since 0.1
isAscii :: ShortText -> Bool
isAscii :: ShortText -> Bool
isAscii ShortText
st = (forall a. Eq a => a -> a -> Bool
/= CInt
0) forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO CInt
c_text_short_is_ascii (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
sz)
  where
    sz :: CSize
sz = ShortText -> CSize
toCSize ShortText
st

foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: ByteArray# -> CSize -> IO CInt

-- | \(\mathcal{O}(n)\) Test whether /all/ code points in 'ShortText' satisfy a predicate.
--
-- >>> all (const False) ""
-- True
--
-- >>> all (> 'c') "abcdabcd"
-- False
--
-- >>> all (/= 'c') "abdabd"
-- True
--
-- @since 0.1.2
all :: (Char -> Bool) -> ShortText -> Bool
all :: (Char -> Bool) -> ShortText -> Bool
all Char -> Bool
p ShortText
st = forall a. Maybe a -> Bool
isNothing ((Char -> Bool) -> ShortText -> B -> Maybe B
findOfs (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
st (Int -> B
B Int
0))

-- | \(\mathcal{O}(n)\) Return the left-most codepoint in 'ShortText' that satisfies the given predicate.
--
-- >>> find (> 'b') "abcdabcd"
-- Just 'c'
--
-- >>> find (> 'b') "ababab"
-- Nothing
--
-- @since 0.1.2
find :: (Char -> Bool) -> ShortText -> Maybe Char
find :: (Char -> Bool) -> ShortText -> Maybe Char
find Char -> Bool
p ShortText
st = B -> Maybe Char
go B
0
  where
    go :: B -> Maybe Char
go !B
ofs
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = forall a. Maybe a
Nothing
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq`
                        if Char -> Bool
p Char
c
                        then forall a. a -> Maybe a
Just Char
c
                        else B -> Maybe Char
go B
ofs'

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Return the index of the left-most codepoint in 'ShortText' that satisfies the given predicate.
--
-- >>> findIndex (> 'b') "abcdabcdef"
-- Just 2
--
-- >>> findIndex (> 'b') "ababab"
-- Nothing
--
-- prop> (indexMaybe t =<< findIndex p t) == find p t
--
-- @since 0.1.2
findIndex :: (Char -> Bool) -> ShortText -> Maybe Int
findIndex :: (Char -> Bool) -> ShortText -> Maybe Int
findIndex Char -> Bool
p ShortText
st = B -> Int -> Maybe Int
go B
0 Int
0
  where
    go :: B -> Int -> Maybe Int
go !B
ofs !Int
i
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = forall a. Maybe a
Nothing
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq`
                        if Char -> Bool
p Char
c
                        then forall a. a -> Maybe a
Just Int
i
                        else B -> Int -> Maybe Int
go B
ofs' (Int
iforall a. Num a => a -> a -> a
+Int
1)

    !sz :: B
sz = ShortText -> B
toB ShortText
st


-- | \(\mathcal{O}(n)\) Splits a string into components delimited by separators,
-- where the predicate returns True for a separator element.  The
-- resulting components do not contain the separators.  Two adjacent
-- separators result in an empty component in the output.  eg.
--
-- >>> split (=='a') "aabbaca"
-- ["","","bb","c",""]
--
-- >>> split (=='a') ""
-- [""]
--
-- prop> intercalate (singleton c) (split (== c) t) = t
--
-- __NOTE__: 'split' never returns an empty list to match the semantics of its counterpart from "Data.Text".
--
-- @since 0.1.3
split :: (Char -> Bool) -> ShortText -> [ShortText]
split :: (Char -> Bool) -> ShortText -> [ShortText]
split Char -> Bool
p ShortText
st0 = B -> [ShortText]
go B
0
  where
    go :: B -> [ShortText]
go !B
ofs0 = case (Char -> Bool) -> ShortText -> B -> Maybe (B, B)
findOfs' Char -> Bool
p ShortText
st0 B
ofs0 of
      Just (B
ofs1,B
ofs2) -> ShortText -> B -> B -> ShortText
slice ShortText
st0 B
ofs0 (B
ofs1forall a. Num a => a -> a -> a
-B
ofs0) forall a. a -> [a] -> [a]
: B -> [ShortText]
go B
ofs2
      Maybe (B, B)
Nothing
        | B
ofs0 forall a. Eq a => a -> a -> Bool
== B
0 -> ShortText
st0 forall a. a -> [a] -> [a]
: []
        | Bool
otherwise -> ShortText -> B -> B -> ShortText
slice ShortText
st0 B
ofs0 (B
maxOfsforall a. Num a => a -> a -> a
-B
ofs0) forall a. a -> [a] -> [a]
: []

    !maxOfs :: B
maxOfs = ShortText -> B
toB ShortText
st0

-- internal helper
{-# INLINE findOfs #-}
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs Char -> Bool
p ShortText
st = B -> Maybe B
go
  where
    go :: B -> Maybe B
    go :: B -> Maybe B
go !B
ofs | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz = forall a. Maybe a
Nothing
    go !B
ofs | Char -> Bool
p Char
c       = forall a. a -> Maybe a
Just B
ofs
            | Bool
otherwise = B -> Maybe B
go B
ofs'
      where
        (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs

    !sz :: B
sz = ShortText -> B
toB ShortText
st

{-# INLINE findOfs' #-}
findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B,B)
findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B, B)
findOfs' Char -> Bool
p ShortText
st = B -> Maybe (B, B)
go
  where
    go :: B -> Maybe (B,B)
    go :: B -> Maybe (B, B)
go !B
ofs | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz = forall a. Maybe a
Nothing
    go !B
ofs | Char -> Bool
p Char
c       = forall a. a -> Maybe a
Just (B
ofs,B
ofs')
            | Bool
otherwise = B -> Maybe (B, B)
go B
ofs'
      where
        (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs

    !sz :: B
sz = ShortText -> B
toB ShortText
st


{-# INLINE findOfsRev #-}
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev Char -> Bool
p ShortText
st = B -> Maybe B
go
  where
    go :: B -> Maybe B
go (B Int
0) = forall a. Maybe a
Nothing
    go !B
ofs
      | Char -> Bool
p (CP -> Char
cp2ch CP
cp) = forall a. a -> Maybe a
Just B
ofs
      | Bool
otherwise    = B -> Maybe B
go (B
ofsforall a. Num a => a -> a -> a
-CP -> B
cpLen CP
cp)
      where
        !cp :: CP
cp = ShortText -> B -> CP
readCodePointRev ShortText
st B
ofs

-- | \(\mathcal{O}(n)\) Split 'ShortText' into longest prefix satisfying the given predicate and the remaining suffix.
--
-- >>> span (< 'c') "abcdabcd"
-- ("ab","cdabcd")
--
-- prop> fst (span p t) <> snd (span p t) == t
--
-- @since 0.1.2
span :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
span :: (Char -> Bool) -> ShortText -> (ShortText, ShortText)
span Char -> Bool
p ShortText
st
  | Just B
ofs <- (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
st (Int -> B
B Int
0) = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  | Bool
otherwise = (ShortText
st,forall a. Monoid a => a
mempty)

-- | \(\mathcal{O}(n)\) Split 'ShortText' into longest suffix satisfying the given predicate and the preceding prefix.
--
-- >>> spanEnd (> 'c') "abcdabcd"
-- ("abcdabc","d")
--
-- prop> fst (spanEnd p t) <> snd (spanEnd p t) == t
--
-- @since 0.1.2
spanEnd :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
spanEnd :: (Char -> Bool) -> ShortText -> (ShortText, ShortText)
spanEnd Char -> Bool
p ShortText
st
  | Just B
ofs <- (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
st (ShortText -> B
toB ShortText
st) = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  | Bool
otherwise = (forall a. Monoid a => a
mempty,ShortText
st)

----------------------------------------------------------------------------

toCSize :: ShortText -> CSize
toCSize :: ShortText -> CSize
toCSize = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Int
BSS.length forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

toB :: ShortText -> B
toB :: ShortText -> B
toB = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Int
BSS.length forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

toLength :: ShortText -> Int
toLength :: ShortText -> Int
toLength ShortText
st = Int# -> Int
I# (ShortText -> Int#
toLength# ShortText
st)

toLength# :: ShortText -> Int#
toLength# :: ShortText -> Int#
toLength# ShortText
st = ByteArray# -> Int#
GHC.Exts.sizeofByteArray# (ShortText -> ByteArray#
toByteArray# ShortText
st)

toByteArray# :: ShortText -> ByteArray#
toByteArray# :: ShortText -> ByteArray#
toByteArray# (ShortText (BSSI.SBS ByteArray#
ba#)) = ByteArray#
ba#

-- | \(\mathcal{O}(0)\) Converts to UTF-8 encoded 'ShortByteString'
--
-- This operation has effectively no overhead, as it's currently merely a @newtype@-cast.
--
-- @since 0.1
toShortByteString :: ShortText -> ShortByteString
toShortByteString :: ShortText -> ShortByteString
toShortByteString (ShortText ShortByteString
b) = ShortByteString
b

-- | \(\mathcal{O}(n)\) Converts to UTF-8 encoded 'BS.ByteString'
--
-- @since 0.1
toByteString :: ShortText -> BS.ByteString
toByteString :: ShortText -> ByteString
toByteString = ShortByteString -> ByteString
BSS.fromShort forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

-- | Construct a 'BB.Builder' that encodes 'ShortText' as UTF-8.
--
-- @since 0.1
toBuilder :: ShortText -> BB.Builder
toBuilder :: ShortText -> Builder
toBuilder = ShortByteString -> Builder
BB.shortByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

-- | \(\mathcal{O}(n)\) Convert to 'String'
--
-- prop> (fromString . toString) t == t
--
-- __Note__: See documentation of 'fromString' for why @('toString' . 'fromString')@ is not an identity function.
--
-- @since 0.1
toString :: ShortText -> String
-- NOTE: impl below beats
--   toString = decodeStringShort' utf8 . toShortByteString
-- except for smallish strings
toString :: ShortText -> String
toString ShortText
st = B -> String
go B
0
  where
    go :: B -> String
go !B
ofs
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = []
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq` (Char
c forall a. a -> [a] -> [a]
: B -> String
go B
ofs')

    !sz :: B
sz = ShortText -> B
toB ShortText
st

----------------------------------------------------------------------------
-- Folds

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with
-- the binary operator and an initial in forward direction (i.e. from
-- left to right).
--
-- >>> foldl (\_ _ -> True) False ""
-- False
--
-- >>> foldl (\s c -> c : s) ['.'] "abcd"
-- "dcba."
--
-- @since 0.1.2
foldl :: (a -> Char -> a) -> a -> ShortText -> a
foldl :: forall a. (a -> Char -> a) -> a -> ShortText -> a
foldl a -> Char -> a
f a
z ShortText
st = B -> a -> a
go B
0 a
z
  where
    go :: B -> a -> a
go !B
ofs a
acc
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = a
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq` B -> a -> a
go B
ofs' (a -> Char -> a
f a
acc Char
c)

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with the binary operator.
--
-- >>> foldl1 max "abcdcba"
-- 'd'
--
-- >>> foldl1 const "abcd"
-- 'a'
--
-- >>> foldl1 (flip const) "abcd"
-- 'd'
--
-- __Note__: Will throw an 'error' exception if index is out of bounds.
--
-- @since 0.1.2
foldl1 :: (Char -> Char -> Char) -> ShortText -> Char
foldl1 :: (Char -> Char -> Char) -> ShortText -> Char
foldl1 Char -> Char -> Char
f ShortText
st
  | B
sz forall a. Eq a => a -> a -> Bool
== B
0    = forall a. (?callStack::CallStack) => String -> a
error String
"foldl1: empty ShortText"
  | Bool
otherwise  = B -> Char -> Char
go B
c0sz Char
c0
  where
    go :: B -> Char -> Char
go !B
ofs Char
acc
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = Char
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq` B -> Char -> Char
go B
ofs' (Char -> Char -> Char
f Char
acc Char
c)
    !sz :: B
sz = ShortText -> B
toB ShortText
st
    (Char
c0,B
c0sz) = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st (Int -> B
B Int
0)

-- | \(\mathcal{O}(n)\) Strict version of 'foldl'.
--
-- @since 0.1.2
foldl' :: (a -> Char -> a) -> a -> ShortText -> a
foldl' :: forall a. (a -> Char -> a) -> a -> ShortText -> a
foldl' a -> Char -> a
f !a
z ShortText
st = B -> a -> a
go B
0 a
z
  where
    go :: B -> a -> a
go !B
ofs !a
acc
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = a
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq` B -> a -> a
go B
ofs' (a -> Char -> a
f a
acc Char
c)

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Strict version of 'foldl1'.
--
-- @since 0.1.2
foldl1' :: (Char -> Char -> Char) -> ShortText -> Char
foldl1' :: (Char -> Char -> Char) -> ShortText -> Char
foldl1' Char -> Char -> Char
f ShortText
st
  | B
sz forall a. Eq a => a -> a -> Bool
== B
0    = forall a. (?callStack::CallStack) => String -> a
error String
"foldl1: empty ShortText"
  | Bool
otherwise  = B -> Char -> Char
go B
c0sz Char
c0
  where
    go :: B -> Char -> Char
go !B
ofs !Char
acc
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = Char
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq` B -> Char -> Char
go B
ofs' (Char -> Char -> Char
f Char
acc Char
c)
    !sz :: B
sz = ShortText -> B
toB ShortText
st
    (Char
c0,B
c0sz) = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st (Int -> B
B Int
0)

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with
-- the binary operator and an initial in reverse direction (i.e. from
-- right to left).
--
-- >>> foldr (\_ _ -> True) False ""
-- False
--
-- >>> foldr (:) ['.'] "abcd"
-- "abcd."
--
-- @since 0.1.2
foldr :: (Char -> a -> a) -> a -> ShortText -> a
foldr :: forall a. (Char -> a -> a) -> a -> ShortText -> a
foldr Char -> a -> a
f a
z ShortText
st = B -> a
go B
0
  where
    go :: B -> a
go !B
ofs
      | B
ofs forall a. Ord a => a -> a -> Bool
>= B
sz  = a
z
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq` Char -> a -> a
f Char
c (B -> a
go B
ofs')

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with the binary operator.
--
-- >>> foldr1 max "abcdcba"
-- 'd'
--
-- >>> foldr1 const "abcd"
-- 'a'
--
-- >>> foldr1 (flip const) "abcd"
-- 'd'
--
-- __Note__: Will throw an 'error' exception if index is out of bounds.
--
-- @since 0.1.2
foldr1 :: (Char -> Char -> Char) -> ShortText -> Char
foldr1 :: (Char -> Char -> Char) -> ShortText -> Char
foldr1 Char -> Char -> Char
f ShortText
st
  | B
sz forall a. Eq a => a -> a -> Bool
== B
0    = forall a. (?callStack::CallStack) => String -> a
error String
"foldr1: empty ShortText"
  | Bool
otherwise  = B -> Char
go B
0
  where
    go :: B -> Char
go !B
ofs = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
              in Char
c seq :: forall a b. a -> b -> b
`seq` B
ofs' seq :: forall a b. a -> b -> b
`seq`
                 (if B
ofs' forall a. Ord a => a -> a -> Bool
>= B
sz then Char
c else Char -> Char -> Char
f Char
c (B -> Char
go B
ofs'))

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Convert to 'T.Text'
--
-- prop> (fromText . toText) t == t
--
-- prop> (toText . fromText) t == t
--
-- This is \(\mathcal{O}(1)\) with @text-2@.
-- Previously it wasn't because 'T.Text' used UTF-16 as its internal representation.
--
-- @since 0.1
toText :: ShortText -> T.Text
#if MIN_VERSION_text(2,0,0)
toText (ShortText (BSSI.SBS ba)) = TI.Text (TA.ByteArray ba) 0 (I# (GHC.Exts.sizeofByteArray# ba))
#else
toText :: ShortText -> Text
toText = ByteString -> Text
T.decodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ByteString
toByteString
#endif

----

-- | \(\mathcal{O}(n)\) Construct/pack from 'String'
--
-- >>> fromString []
-- ""
--
-- >>> fromString ['a','b','c']
-- "abc"
--
-- >>> fromString ['\55295','\55296','\57343','\57344'] -- U+D7FF U+D800 U+DFFF U+E000
-- "\55295\65533\65533\57344"
--
-- __Note__: This function is total because it replaces the (invalid) code-points U+D800 through U+DFFF with the replacement character U+FFFD.
--
-- @since 0.1
fromString :: String -> ShortText
fromString :: String -> ShortText
fromString String
s = case String
s of
  []  -> forall a. Monoid a => a
mempty
  [Char
c] -> Char -> ShortText
singleton forall a b. (a -> b) -> a -> b
$ Char -> Char
r Char
c
  String
_   -> ShortByteString -> ShortText
ShortText forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextEncoding -> String -> ShortByteString
encodeStringShort TextEncoding
utf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
r forall a b. (a -> b) -> a -> b
$ String
s
  where
    r :: Char -> Char
r Char
c | forall i. (Num i, Bits i) => i -> Bool
isSurr (Char -> Int
ord Char
c) = Char
'\xFFFD'
        | Bool
otherwise      = Char
c

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from 'T.Text'
--
-- This is currently not \(\mathcal{O}(1)\) because currently 'T.Text' uses UTF-16 as its internal representation.
-- In the event that 'T.Text' will change its internal representation to UTF-8 this operation will become \(\mathcal{O}(1)\).
--
-- @since 0.1
fromText :: T.Text -> ShortText
#if MIN_VERSION_text(2,0,0)
fromText (TI.Text (TA.ByteArray ba) off len) =
    ShortText (BSSI.SBS (case sliceByteArray (TA.ByteArray ba) off len of TA.ByteArray ba' -> ba'))

sliceByteArray :: TA.Array -> Int -> Int -> TA.Array
sliceByteArray ta@(TA.ByteArray ba) 0 len
    | len == I# (GHC.Exts.sizeofByteArray# ba)
    = ta
sliceByteArray ta off len = TA.run $ do
    ma <- TA.new len
    TA.copyI len ma 0 ta off
    return ma

#else
fromText :: Text -> ShortText
fromText = ByteString -> ShortText
fromByteStringUnsafe forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8
#endif

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from UTF-8 encoded 'ShortByteString'
--
-- This operation doesn't copy the input 'ShortByteString' but it
-- cannot be \(\mathcal{O}(1)\) because we need to validate the UTF-8 encoding.
--
-- Returns 'Nothing' in case of invalid UTF-8 encoding.
--
-- >>> fromShortByteString "\x00\x38\xF0\x90\x8C\x9A" -- U+00 U+38 U+1031A
-- Just "\NUL8\66330"
--
-- >>> fromShortByteString "\xC0\x80" -- invalid denormalised U+00
-- Nothing
--
-- >>> fromShortByteString "\xED\xA0\x80" -- U+D800 (non-scalar code-point)
-- Nothing
--
-- >>> fromShortByteString "\xF4\x8f\xbf\xbf" -- U+10FFFF
-- Just "\1114111"
--
-- >>> fromShortByteString "\xF4\x90\x80\x80" -- U+110000 (invalid)
-- Nothing
--
-- prop> fromShortByteString (toShortByteString t) == Just t
--
-- @since 0.1
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString ShortByteString
sbs
  | ShortText -> Bool
isValidUtf8 ShortText
st  = forall a. a -> Maybe a
Just ShortText
st
  | Bool
otherwise       = forall a. Maybe a
Nothing
  where
    st :: ShortText
st = ShortByteString -> ShortText
ShortText ShortByteString
sbs

-- | \(\mathcal{O}(0)\) Construct 'ShortText' from UTF-8 encoded 'ShortByteString'
--
-- This operation has effectively no overhead, as it's currently merely a @newtype@-cast.
--
-- __WARNING__: Unlike the safe 'fromShortByteString' conversion, this
-- conversion is /unsafe/ as it doesn't validate the well-formedness of the
-- UTF-8 encoding.
--
-- @since 0.1.1
fromShortByteStringUnsafe :: ShortByteString -> ShortText
fromShortByteStringUnsafe :: ShortByteString -> ShortText
fromShortByteStringUnsafe = ShortByteString -> ShortText
ShortText

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from UTF-8 encoded 'BS.ByteString'
--
-- 'fromByteString' accepts (or rejects) the same input data as 'fromShortByteString'.
--
-- Returns 'Nothing' in case of invalid UTF-8 encoding.
--
-- @since 0.1
fromByteString :: BS.ByteString -> Maybe ShortText
fromByteString :: ByteString -> Maybe ShortText
fromByteString = ShortByteString -> Maybe ShortText
fromShortByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
BSS.toShort

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from UTF-8 encoded 'BS.ByteString'
--
-- This operation is \(\mathcal{O}(n)\) because the 'BS.ByteString' needs to be
-- copied into an unpinned 'ByteArray#'.
--
-- __WARNING__: Unlike the safe 'fromByteString' conversion, this
-- conversion is /unsafe/ as it doesn't validate the well-formedness of the
-- UTF-8 encoding.
--
-- @since 0.1.1
fromByteStringUnsafe :: BS.ByteString -> ShortText
fromByteStringUnsafe :: ByteString -> ShortText
fromByteStringUnsafe = ShortByteString -> ShortText
ShortText forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
BSS.toShort

----------------------------------------------------------------------------

encodeString :: TextEncoding -> String -> BS.ByteString
encodeString :: TextEncoding -> String -> ByteString
encodeString TextEncoding
te String
str = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ forall a. TextEncoding -> String -> (CStringLen -> IO a) -> IO a
GHC.withCStringLen TextEncoding
te String
str CStringLen -> IO ByteString
BS.packCStringLen

-- decodeString :: TextEncoding -> BS.ByteString -> Maybe String
-- decodeString te bs = cvtEx $ unsafePerformIO $ try $ BS.useAsCStringLen bs (GHC.peekCStringLen te)
--   where
--     cvtEx :: Either IOException a -> Maybe a
--     cvtEx = either (const Nothing) Just

decodeString' :: TextEncoding -> BS.ByteString -> String
decodeString' :: TextEncoding -> ByteString -> String
decodeString' TextEncoding
te ByteString
bs = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
bs (TextEncoding -> CStringLen -> IO String
GHC.peekCStringLen TextEncoding
te)

decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' TextEncoding
te = TextEncoding -> ByteString -> String
decodeString' TextEncoding
te forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> ByteString
BSS.fromShort

encodeStringShort :: TextEncoding -> String -> BSS.ShortByteString
encodeStringShort :: TextEncoding -> String -> ShortByteString
encodeStringShort TextEncoding
te = ByteString -> ShortByteString
BSS.toShort forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextEncoding -> String -> ByteString
encodeString TextEncoding
te

-- isValidUtf8' :: ShortText -> Int
-- isValidUtf8' st = fromIntegral $ unsafeDupablePerformIO (c_text_short_is_valid_utf8 (toByteArray# st) (toCSize st))

isValidUtf8 :: ShortText -> Bool
isValidUtf8 :: ShortText -> Bool
isValidUtf8 ShortText
st = (forall a. Eq a => a -> a -> Bool
==CInt
0) forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO CInt
c_text_short_is_valid_utf8 (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st))

type CCodePoint = Word

foreign import ccall unsafe "hs_text_short_is_valid_utf8" c_text_short_is_valid_utf8 :: ByteArray# -> CSize -> IO CInt

foreign import ccall unsafe "hs_text_short_index_cp" c_text_short_index :: ByteArray# -> CSize -> CSize -> IO CCodePoint

-- | \(\mathcal{O}(n)\) Lookup /i/-th code-point in 'ShortText'.
--
-- Returns 'Nothing' if out of bounds.
--
-- prop> indexMaybe (singleton c) 0 == Just c
--
-- prop> indexMaybe t 0 == fmap fst (uncons t)
--
-- prop> indexMaybe mempty i == Nothing
--
-- @since 0.1.2
indexMaybe :: ShortText -> Int -> Maybe Char
indexMaybe :: ShortText -> Int -> Maybe Char
indexMaybe ShortText
st Int
i
  | Int
i forall a. Ord a => a -> a -> Bool
< Int
0      = forall a. Maybe a
Nothing
  | Bool
otherwise  = CP -> Maybe Char
cp2chSafe CP
cp
  where
    cp :: CP
cp = Word -> CP
CP forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO Word
c_text_short_index (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st) (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))

-- | \(\mathcal{O}(n)\) Lookup /i/-th code-point from the end of 'ShortText'.
--
-- Returns 'Nothing' if out of bounds.
--
-- prop> indexEndMaybe (singleton c) 0 == Just c
--
-- prop> indexEndMaybe t 0 == fmap snd (unsnoc t)
--
-- prop> indexEndMaybe mempty i == Nothing
--
-- @since 0.1.2
indexEndMaybe :: ShortText -> Int -> Maybe Char
indexEndMaybe :: ShortText -> Int -> Maybe Char
indexEndMaybe ShortText
st Int
i
  | Int
i forall a. Ord a => a -> a -> Bool
< Int
0      = forall a. Maybe a
Nothing
  | Bool
otherwise  = CP -> Maybe Char
cp2chSafe CP
cp
  where
    cp :: CP
cp = Word -> CP
CP forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO Word
c_text_short_index_rev (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st) (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))

foreign import ccall unsafe "hs_text_short_index_cp_rev" c_text_short_index_rev :: ByteArray# -> CSize -> CSize -> IO CCodePoint


-- | \(\mathcal{O}(n)\) Split 'ShortText' into two halves.
--
-- @'splitAt' n t@ returns a pair of 'ShortText' with the following properties:
--
-- prop> length (fst (splitAt n t)) == min (length t) (max 0 n)
--
-- prop> fst (splitAt n t) <> snd (splitAt n t) == t
--
-- >>> splitAt 2 "abcdef"
-- ("ab","cdef")
--
-- >>> splitAt 10 "abcdef"
-- ("abcdef","")
--
-- >>> splitAt (-1) "abcdef"
-- ("","abcdef")
--
-- @since 0.1.2
splitAt :: Int -> ShortText -> (ShortText,ShortText)
splitAt :: Int -> ShortText -> (ShortText, ShortText)
splitAt Int
i ShortText
st
  | Int
i forall a. Ord a => a -> a -> Bool
<= Int
0    = (forall a. Monoid a => a
mempty,ShortText
st)
  | Bool
otherwise = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  where
    ofs :: B
ofs   = CSize -> B
csizeToB forall a b. (a -> b) -> a -> b
$
            forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO CSize
c_text_short_index_ofs (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
stsz (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))
    stsz :: CSize
stsz  = ShortText -> CSize
toCSize ShortText
st

-- | \(\mathcal{O}(n)\) Split 'ShortText' into two halves.
--
-- @'splitAtEnd' n t@ returns a pair of 'ShortText' with the following properties:
--
-- prop> length (snd (splitAtEnd n t)) == min (length t) (max 0 n)
--
-- prop> fst (splitAtEnd n t) <> snd (splitAtEnd n t) == t
--
-- prop> splitAtEnd n t == splitAt (length t - n) t
--
-- >>> splitAtEnd 2 "abcdef"
-- ("abcd","ef")
--
-- >>> splitAtEnd 10 "abcdef"
-- ("","abcdef")
--
-- >>> splitAtEnd (-1) "abcdef"
-- ("abcdef","")
--
-- @since 0.1.2
splitAtEnd :: Int -> ShortText -> (ShortText,ShortText)
splitAtEnd :: Int -> ShortText -> (ShortText, ShortText)
splitAtEnd Int
i ShortText
st
  | Int
i forall a. Ord a => a -> a -> Bool
<= Int
0      = (ShortText
st,forall a. Monoid a => a
mempty)
  | B
ofs forall a. Ord a => a -> a -> Bool
>= B
stsz = (forall a. Monoid a => a
mempty,ShortText
st)
  | Bool
otherwise   = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  where
    ofs :: B
ofs   = CSize -> B
csizeToB forall a b. (a -> b) -> a -> b
$
            forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO CSize
c_text_short_index_ofs_rev (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st) (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
iforall a. Num a => a -> a -> a
-Int
1)))
    stsz :: B
stsz  = ShortText -> B
toB ShortText
st

{-# INLINE splitAtOfs #-}
splitAtOfs :: B -> ShortText -> (ShortText,ShortText)
splitAtOfs :: B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  | B
ofs  forall a. Eq a => a -> a -> Bool
== B
0    = (forall a. Monoid a => a
mempty,ShortText
st)
  | B
ofs  forall a. Ord a => a -> a -> Bool
>= B
stsz = (ShortText
st,forall a. Monoid a => a
mempty)
  | Bool
otherwise    = (ShortText -> B -> B -> ShortText
slice ShortText
st B
0 B
ofs, ShortText -> B -> B -> ShortText
slice ShortText
st B
ofs (B
stszforall a. Num a => a -> a -> a
-B
ofs))
  where
    !stsz :: B
stsz  = ShortText -> B
toB ShortText
st

foreign import ccall unsafe "hs_text_short_index_ofs" c_text_short_index_ofs :: ByteArray# -> CSize -> CSize -> IO CSize

foreign import ccall unsafe "hs_text_short_index_ofs_rev" c_text_short_index_ofs_rev :: ByteArray# -> CSize -> CSize -> IO CSize


-- | \(\mathcal{O}(n)\) Inverse operation to 'cons'
--
-- Returns 'Nothing' for empty input 'ShortText'.
--
-- prop> uncons (cons c t) == Just (c,t)
--
-- >>> uncons ""
-- Nothing
--
-- >>> uncons "fmap"
-- Just ('f',"map")
--
-- @since 0.1.2
uncons :: ShortText -> Maybe (Char,ShortText)
uncons :: ShortText -> Maybe (Char, ShortText)
uncons ShortText
st
  | ShortText -> Bool
null ShortText
st    = forall a. Maybe a
Nothing
  | B
len2 forall a. Eq a => a -> a -> Bool
== B
0  = forall a. a -> Maybe a
Just (Char
c0, forall a. Monoid a => a
mempty)
  | Bool
otherwise  = forall a. a -> Maybe a
Just (Char
c0, ShortText -> B -> B -> ShortText
slice ShortText
st B
ofs B
len2)
  where
    c0 :: Char
c0  = CP -> Char
cp2ch CP
cp0
    cp0 :: CP
cp0 = ShortText -> B -> CP
readCodePoint ShortText
st B
0
    ofs :: B
ofs = CP -> B
cpLen CP
cp0
    len2 :: B
len2 = ShortText -> B
toB ShortText
st forall a. Num a => a -> a -> a
- B
ofs

-- | \(\mathcal{O}(n)\) Inverse operation to 'snoc'
--
-- Returns 'Nothing' for empty input 'ShortText'.
--
-- prop> unsnoc (snoc t c) == Just (t,c)
--
-- >>> unsnoc ""
-- Nothing
--
-- >>> unsnoc "fmap"
-- Just ("fma",'p')
--
-- @since 0.1.2
unsnoc :: ShortText -> Maybe (ShortText,Char)
unsnoc :: ShortText -> Maybe (ShortText, Char)
unsnoc ShortText
st
  | ShortText -> Bool
null ShortText
st    = forall a. Maybe a
Nothing
  | B
len1 forall a. Eq a => a -> a -> Bool
== B
0  = forall a. a -> Maybe a
Just (forall a. Monoid a => a
mempty, Char
c0)
  | Bool
otherwise  = forall a. a -> Maybe a
Just (ShortText -> B -> B -> ShortText
slice ShortText
st B
0 B
len1, Char
c0)
  where
    c0 :: Char
c0  = CP -> Char
cp2ch CP
cp0
    cp0 :: CP
cp0 = ShortText -> B -> CP
readCodePointRev ShortText
st B
stsz
    stsz :: B
stsz = ShortText -> B
toB ShortText
st
    len1 :: B
len1 = B
stsz forall a. Num a => a -> a -> a
- CP -> B
cpLen CP
cp0

-- | \(\mathcal{O}(n)\) Tests whether the first 'ShortText' is a prefix of the second 'ShortText'
--
-- >>> isPrefixOf "ab" "abcdef"
-- True
--
-- >>> isPrefixOf "ac" "abcdef"
-- False
--
-- prop> isPrefixOf "" t == True
--
-- prop> isPrefixOf t t == True
--
-- @since 0.1.2
isPrefixOf :: ShortText -> ShortText -> Bool
isPrefixOf :: ShortText -> ShortText -> Bool
isPrefixOf ShortText
x ShortText
y
  | Int
lx forall a. Ord a => a -> a -> Bool
> Int
ly = Bool
False
  | Int
lx forall a. Eq a => a -> a -> Bool
== Int
0 = Bool
True
  | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# (ShortText -> ByteArray#
toByteArray# ShortText
x) Int#
0# (ShortText -> ByteArray#
toByteArray# ShortText
y) Int#
0# Int#
n# of
                  Int#
0# -> Bool
True
                  Int#
_  -> Bool
False
  where
    !lx :: Int
lx@(I# Int#
n#) = ShortText -> Int
toLength ShortText
x
    !ly :: Int
ly = ShortText -> Int
toLength ShortText
y

-- | \(\mathcal{O}(n)\) Strip prefix from second 'ShortText' argument.
--
-- Returns 'Nothing' if first argument is not a prefix of the second argument.
--
-- >>> stripPrefix "text-" "text-short"
-- Just "short"
--
-- >>> stripPrefix "test-" "text-short"
-- Nothing
--
-- @since 0.1.2
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix ShortText
pfx ShortText
t
  | ShortText -> ShortText -> Bool
isPrefixOf ShortText
pfx ShortText
t = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$! forall a b. (a, b) -> b
snd (B -> ShortText -> (ShortText, ShortText)
splitAtOfs (ShortText -> B
toB ShortText
pfx) ShortText
t)
  | Bool
otherwise        = forall a. Maybe a
Nothing

-- | \(\mathcal{O}(n)\) Tests whether the first 'ShortText' is a suffix of the second 'ShortText'
--
-- >>> isSuffixOf "ef" "abcdef"
-- True
--
-- >>> isPrefixOf "df" "abcdef"
-- False
--
-- prop> isSuffixOf "" t == True
--
-- prop> isSuffixOf t t == True
--
-- @since 0.1.2
isSuffixOf :: ShortText -> ShortText -> Bool
isSuffixOf :: ShortText -> ShortText -> Bool
isSuffixOf ShortText
x ShortText
y
  | Int
lx forall a. Ord a => a -> a -> Bool
> Int
ly = Bool
False
  | Int
lx forall a. Eq a => a -> a -> Bool
== Int
0 = Bool
True
  | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# (ShortText -> ByteArray#
toByteArray# ShortText
x) Int#
0# (ShortText -> ByteArray#
toByteArray# ShortText
y) Int#
ofs2# Int#
n# of
                  Int#
0# -> Bool
True
                  Int#
_  -> Bool
False
  where
    !(I# Int#
ofs2#) = Int
ly forall a. Num a => a -> a -> a
- Int
lx
    !lx :: Int
lx@(I# Int#
n#) = ShortText -> Int
toLength ShortText
x
    !ly :: Int
ly = ShortText -> Int
toLength ShortText
y

-- | \(\mathcal{O}(n)\) Strip suffix from second 'ShortText' argument.
--
-- Returns 'Nothing' if first argument is not a suffix of the second argument.
--
-- >>> stripSuffix "-short" "text-short"
-- Just "text"
--
-- >>> stripSuffix "-utf8" "text-short"
-- Nothing
--
-- @since 0.1.2
stripSuffix :: ShortText -> ShortText -> Maybe ShortText
stripSuffix :: ShortText -> ShortText -> Maybe ShortText
stripSuffix ShortText
sfx ShortText
t
  | ShortText -> ShortText -> Bool
isSuffixOf ShortText
sfx ShortText
t = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$! forall a b. (a, b) -> a
fst (B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
pfxLen ShortText
t)
  | Bool
otherwise        = forall a. Maybe a
Nothing
  where
    pfxLen :: B
pfxLen = ShortText -> B
toB ShortText
t forall a. Num a => a -> a -> a
- ShortText -> B
toB ShortText
sfx

----------------------------------------------------------------------------

-- | \(\mathcal{O}(n)\) Insert character between characters of 'ShortText'.
--
-- >>> intersperse '*' "_"
-- "_"
--
-- >>> intersperse '*' "MASH"
-- "M*A*S*H"
--
-- @since 0.1.2
intersperse :: Char -> ShortText -> ShortText
intersperse :: Char -> ShortText -> ShortText
intersperse Char
c ShortText
st
  | ShortText -> Bool
null ShortText
st = forall a. Monoid a => a
mempty
  | Int
sn forall a. Eq a => a -> a -> Bool
== Int
1 = ShortText
st
  | Bool
otherwise = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
newsz forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> do
      let !cp0 :: CP
cp0 = ShortText -> B -> CP
readCodePoint ShortText
st B
0
          !cp0sz :: B
cp0sz = CP -> B
cpLen CP
cp0
      forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cp0sz MBA s
mba B
0 CP
cp0
      forall s. MBA s -> Int -> B -> B -> ST s ()
go MBA s
mba (Int
sn forall a. Num a => a -> a -> a
- Int
1) B
cp0sz B
cp0sz
  where
    newsz :: B
newsz = B
ssz forall a. Num a => a -> a -> a
+ ((Int
snforall a. Num a => a -> a -> a
-Int
1) Int -> B -> B
`mulB` B
csz)
    ssz :: B
ssz = ShortText -> B
toB ShortText
st
    sn :: Int
sn  = ShortText -> Int
length ShortText
st
    csz :: B
csz = CP -> B
cpLen CP
cp
    cp :: CP
cp  = Char -> CP
ch2cp Char
c

    go :: MBA s -> Int -> B -> B -> ST s ()
    go :: forall s. MBA s -> Int -> B -> B -> ST s ()
go MBA s
_   Int
0 !B
_  !B
_   = forall (m :: * -> *) a. Monad m => a -> m a
return ()
    go MBA s
mba Int
n B
ofs B
ofs2 = do
      let !cp1 :: CP
cp1 = ShortText -> B -> CP
readCodePoint ShortText
st B
ofs2
          !cp1sz :: B
cp1sz = CP -> B
cpLen CP
cp1
      forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
csz   MBA s
mba B
ofs CP
cp
      forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cp1sz MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
csz) CP
cp1
      forall s. MBA s -> Int -> B -> B -> ST s ()
go MBA s
mba (Int
nforall a. Num a => a -> a -> a
-Int
1) (B
ofsforall a. Num a => a -> a -> a
+B
cszforall a. Num a => a -> a -> a
+B
cp1sz) (B
ofs2forall a. Num a => a -> a -> a
+B
cp1sz)

-- | \(\mathcal{O}(n)\) Insert 'ShortText' inbetween list of 'ShortText's.
--
-- >>> intercalate ", " []
-- ""
--
-- >>> intercalate ", " ["foo"]
-- "foo"
--
-- >>> intercalate ", " ["foo","bar","doo"]
-- "foo, bar, doo"
--
-- prop> intercalate "" ts == concat ts
--
-- @since 0.1.2
intercalate :: ShortText -> [ShortText] -> ShortText
intercalate :: ShortText -> [ShortText] -> ShortText
intercalate ShortText
_ []  = forall a. Monoid a => a
mempty
intercalate ShortText
_ [ShortText
t] = ShortText
t
intercalate ShortText
sep [ShortText]
ts
  | ShortText -> Bool
null ShortText
sep   = forall a. Monoid a => [a] -> a
mconcat [ShortText]
ts
  | Bool
otherwise  = forall a. Monoid a => [a] -> a
mconcat (forall a. a -> [a] -> [a]
List.intersperse ShortText
sep [ShortText]
ts)

-- | \(\mathcal{O}(n*m)\) Replicate a 'ShortText'.
--
-- A repetition count smaller than 1 results in an empty string result.
--
-- >>> replicate 3 "jobs!"
-- "jobs!jobs!jobs!"
--
-- >>> replicate 10000 ""
-- ""
--
-- >>> replicate 0 "nothing"
-- ""
--
-- prop> length (replicate n t) == max 0 n * length t
--
-- @since 0.1.2
replicate :: Int -> ShortText -> ShortText
replicate :: Int -> ShortText -> ShortText
replicate Int
n0 ShortText
t
  | Int
n0 forall a. Ord a => a -> a -> Bool
< Int
1     = forall a. Monoid a => a
mempty
  | ShortText -> Bool
null ShortText
t    = forall a. Monoid a => a
mempty
  | Bool
otherwise = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (Int
n0 Int -> B -> B
`mulB` B
sz) (forall s. Int -> MBA s -> ST s ()
go Int
0)
  where
    go :: Int -> MBA s -> ST s ()
    go :: forall s. Int -> MBA s -> ST s ()
go Int
j MBA s
mba
      | Int
j forall a. Eq a => a -> a -> Bool
== Int
n0    = forall (m :: * -> *) a. Monad m => a -> m a
return ()
      | Bool
otherwise  = do
          forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
t B
0 MBA s
mba (Int
j Int -> B -> B
`mulB` B
sz) B
sz
          forall s. Int -> MBA s -> ST s ()
go (Int
jforall a. Num a => a -> a -> a
+Int
1) MBA s
mba

    sz :: B
sz = ShortText -> B
toB ShortText
t

-- | \(\mathcal{O}(n)\) Reverse characters in 'ShortText'.
--
-- >>> reverse "star live desserts"
-- "stressed evil rats"
--
-- prop> reverse (singleton c) == singleton c
--
-- prop> reverse (reverse t) == t
--
-- @since 0.1.2
reverse :: ShortText -> ShortText
reverse :: ShortText -> ShortText
reverse ShortText
st
  | ShortText -> Bool
null ShortText
st   = forall a. Monoid a => a
mempty
  | Int
sn forall a. Eq a => a -> a -> Bool
== Int
1   = ShortText
st
  | Bool
otherwise = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
sz forall a b. (a -> b) -> a -> b
$ forall s. Int -> B -> MBA s -> ST s ()
go Int
sn B
0
  where
    sz :: B
sz = ShortText -> B
toB ShortText
st
    sn :: Int
sn = ShortText -> Int
length ShortText
st

    go :: Int -> B -> MBA s -> ST s ()
    go :: forall s. Int -> B -> MBA s -> ST s ()
go Int
0 !B
_  MBA s
_   = forall (m :: * -> *) a. Monad m => a -> m a
return ()
    go Int
i B
ofs MBA s
mba = do
      let !cp :: CP
cp   = ShortText -> B -> CP
readCodePoint ShortText
st B
ofs
          !cpsz :: B
cpsz = CP -> B
cpLen CP
cp
          !ofs' :: B
ofs' = B
ofsforall a. Num a => a -> a -> a
+B
cpsz
      forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cpsz MBA s
mba (B
sz forall a. Num a => a -> a -> a
- B
ofs') CP
cp
      forall s. Int -> B -> MBA s -> ST s ()
go (Int
iforall a. Num a => a -> a -> a
-Int
1) B
ofs' MBA s
mba


-- | \(\mathcal{O}(n)\) Remove characters from 'ShortText' which don't satisfy given predicate.
--
-- >>> filter (`notElem` ['a','e','i','o','u']) "You don't need vowels to convey information!"
-- "Y dn't nd vwls t cnvy nfrmtn!"
--
-- prop> filter (const False) t == ""
--
-- prop> filter (const True) t == t
--
-- prop> length (filter p t) <= length t
--
-- prop> filter p t == pack [ c | c <- unpack t, p c ]
--
-- @since 0.1.2
filter :: (Char -> Bool) -> ShortText -> ShortText
filter :: (Char -> Bool) -> ShortText -> ShortText
filter Char -> Bool
p ShortText
t
  = case (Maybe B
mofs1,Maybe B
mofs2) of
      (Maybe B
Nothing,   Maybe B
_)       -> ShortText
t -- no non-accepted characters found
      (Just B
0,    Maybe B
Nothing) -> forall a. Monoid a => a
mempty -- no accepted characters found
      (Just B
ofs1, Maybe B
Nothing) -> ShortText -> B -> B -> ShortText
slice ShortText
t B
0 B
ofs1 -- only prefix accepted
      (Just B
ofs1, Just B
ofs2) -> B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink (B
t0szforall a. Num a => a -> a -> a
-(B
ofs2forall a. Num a => a -> a -> a
-B
ofs1)) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> do
        -- copy accepted prefix
        forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
t B
0 MBA s
mba B
0 B
ofs1
        -- [ofs1 .. ofs2) are a non-accepted region
        -- filter rest after ofs2
        B
t1sz <- forall s. MBA s -> B -> B -> ST s B
go MBA s
mba B
ofs2 B
ofs1
        forall (m :: * -> *) a. Monad m => a -> m a
return B
t1sz
  where
    mofs1 :: Maybe B
mofs1 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
t (Int -> B
B Int
0) -- first non-accepted Char
    mofs2 :: Maybe B
mofs2 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs Char -> Bool
p ShortText
t (forall a. a -> Maybe a -> a
fromMaybe (Int -> B
B Int
0) Maybe B
mofs1) -- first accepted Char

    t0sz :: B
t0sz = ShortText -> B
toB ShortText
t

    go :: MBA s -> B -> B -> ST s B
    go :: forall s. MBA s -> B -> B -> ST s B
go MBA s
mba !B
t0ofs !B
t1ofs
      | B
t0ofs forall a. Ord a => a -> a -> Bool
>= B
t0sz = forall (m :: * -> *) a. Monad m => a -> m a
return B
t1ofs
      | Bool
otherwise = let !cp :: CP
cp = ShortText -> B -> CP
readCodePoint ShortText
t B
t0ofs
                        !cpsz :: B
cpsz = CP -> B
cpLen CP
cp
                    in if Char -> Bool
p (CP -> Char
cp2ch CP
cp)
                       then forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cpsz MBA s
mba B
t1ofs CP
cp forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                            forall s. MBA s -> B -> B -> ST s B
go MBA s
mba (B
t0ofsforall a. Num a => a -> a -> a
+B
cpsz) (B
t1ofsforall a. Num a => a -> a -> a
+B
cpsz)
                       else forall s. MBA s -> B -> B -> ST s B
go MBA s
mba (B
t0ofsforall a. Num a => a -> a -> a
+B
cpsz) B
t1ofs -- skip code-point

-- | \(\mathcal{O}(n)\) Strip characters from the beginning end and of 'ShortText' which satisfy given predicate.
--
-- >>> dropAround (== ' ') "   white   space   "
-- "white   space"
--
-- >>> dropAround (> 'a') "bcdefghi"
-- ""
--
-- @since 0.1.2
dropAround :: (Char -> Bool) -> ShortText -> ShortText
dropAround :: (Char -> Bool) -> ShortText -> ShortText
dropAround Char -> Bool
p ShortText
t0 = case (Maybe B
mofs1,Maybe B
mofs2) of
                    (Maybe B
Nothing,Maybe B
_) -> forall a. Monoid a => a
mempty
                    (Just B
ofs1,Just B
ofs2)
                      | B
ofs1 forall a. Eq a => a -> a -> Bool
== B
0, B
ofs2 forall a. Eq a => a -> a -> Bool
== B
t0sz  -> ShortText
t0
                      | B
ofs1 forall a. Ord a => a -> a -> Bool
< B
ofs2  -> B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
ofs2forall a. Num a => a -> a -> a
-B
ofs1) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> do
                          forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
t0 B
ofs1 MBA s
mba (Int -> B
B Int
0) (B
ofs2forall a. Num a => a -> a -> a
-B
ofs1)
                    (Maybe B
_,Maybe B
_) -> forall a. (?callStack::CallStack) => String -> a
error String
"dropAround: the impossible happened"
  where
    mofs1 :: Maybe B
mofs1 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs    (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
t0 (Int -> B
B Int
0)
    mofs2 :: Maybe B
mofs2 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
t0 B
t0sz
    t0sz :: B
t0sz = ShortText -> B
toB ShortText
t0

----------------------------------------------------------------------------

-- | Construct a new 'ShortText' from an existing one by slicing
--
-- NB: The 'CSize' arguments refer to byte-offsets
slice :: ShortText -> B -> B -> ShortText
slice :: ShortText -> B -> B -> ShortText
slice ShortText
st B
ofs B
len
  | B
ofs forall a. Ord a => a -> a -> Bool
< B
0    = forall a. (?callStack::CallStack) => String -> a
error String
"invalid offset"
  | B
len forall a. Ord a => a -> a -> Bool
< B
0    = forall a. (?callStack::CallStack) => String -> a
error String
"invalid length"
  | B
len' forall a. Eq a => a -> a -> Bool
== B
0  = forall a. Monoid a => a
mempty
  | Bool
otherwise  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
len' forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
st B
ofs' MBA s
mba B
0 B
len'
  where
    len0 :: B
len0 = ShortText -> B
toB ShortText
st
    len' :: B
len' = forall a. Ord a => a -> a -> a
max B
0 (forall a. Ord a => a -> a -> a
min B
len (B
len0forall a. Num a => a -> a -> a
-B
ofs))
    ofs' :: B
ofs' = forall a. Ord a => a -> a -> a
max B
0 B
ofs

----------------------------------------------------------------------------
-- low-level MutableByteArray# helpers

-- | Byte offset (or size) in bytes
--
-- This currently wraps an 'Int' because this is what GHC's primops
-- currently use for byte offsets/sizes.
newtype B = B { B -> Int
unB :: Int }
          deriving (Eq B
B -> B -> Bool
B -> B -> Ordering
B -> B -> B
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: B -> B -> B
$cmin :: B -> B -> B
max :: B -> B -> B
$cmax :: B -> B -> B
>= :: B -> B -> Bool
$c>= :: B -> B -> Bool
> :: B -> B -> Bool
$c> :: B -> B -> Bool
<= :: B -> B -> Bool
$c<= :: B -> B -> Bool
< :: B -> B -> Bool
$c< :: B -> B -> Bool
compare :: B -> B -> Ordering
$ccompare :: B -> B -> Ordering
Ord,B -> B -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: B -> B -> Bool
$c/= :: B -> B -> Bool
== :: B -> B -> Bool
$c== :: B -> B -> Bool
Eq,Integer -> B
B -> B
B -> B -> B
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> B
$cfromInteger :: Integer -> B
signum :: B -> B
$csignum :: B -> B
abs :: B -> B
$cabs :: B -> B
negate :: B -> B
$cnegate :: B -> B
* :: B -> B -> B
$c* :: B -> B -> B
- :: B -> B -> B
$c- :: B -> B -> B
+ :: B -> B -> B
$c+ :: B -> B -> B
Num)

{- TODO: introduce operators for 'B' to avoid 'Num' -}

mulB :: Int -> B -> B
mulB :: Int -> B -> B
mulB Int
n (B Int
b) = Int -> B
B (Int
nforall a. Num a => a -> a -> a
*Int
b)

csizeFromB :: B -> CSize
csizeFromB :: B -> CSize
csizeFromB = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. B -> Int
unB

csizeToB :: CSize -> B
csizeToB :: CSize -> B
csizeToB = Int -> B
B forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral

data MBA s = MBA# { forall s. MBA s -> MutableByteArray# s
unMBA# :: MutableByteArray# s }

{-# INLINE create #-}
create :: B -> (forall s. MBA s -> ST s ()) -> ShortText
create :: B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
n forall s. MBA s -> ST s ()
go = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
  MBA s
mba <- forall s. B -> ST s (MBA s)
newByteArray B
n
  forall s. MBA s -> ST s ()
go MBA s
mba
  forall s. MBA s -> ST s ShortText
unsafeFreeze MBA s
mba

{-# INLINE createShrink #-}
createShrink :: B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink :: B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink B
n forall s. MBA s -> ST s B
go = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
  MBA s
mba <- forall s. B -> ST s (MBA s)
newByteArray B
n
  B
n' <- forall s. MBA s -> ST s B
go MBA s
mba
  if B
n' forall a. Ord a => a -> a -> Bool
< B
n
    then forall s. MBA s -> B -> ST s ShortText
unsafeFreezeShrink MBA s
mba B
n'
    else forall s. MBA s -> ST s ShortText
unsafeFreeze MBA s
mba

{-# INLINE unsafeFreeze #-}
unsafeFreeze :: MBA s -> ST s ShortText
unsafeFreeze :: forall s. MBA s -> ST s ShortText
unsafeFreeze (MBA# MutableByteArray# s
mba#)
  = forall s a. STRep s a -> ST s a
ST forall a b. (a -> b) -> a -> b
$ \State# s
s -> case forall d.
MutableByteArray# d -> State# d -> (# State# d, ByteArray# #)
GHC.Exts.unsafeFreezeByteArray# MutableByteArray# s
mba# State# s
s of
                 (# State# s
s', ByteArray#
ba# #) -> (# State# s
s', ShortByteString -> ShortText
ShortText (ByteArray# -> ShortByteString
BSSI.SBS ByteArray#
ba#) #)

{-# INLINE copyByteArray #-}
copyByteArray :: ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray :: forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray (ShortText (BSSI.SBS ByteArray#
src#)) (B (I# Int#
src_off#)) (MBA# MutableByteArray# s
dst#) (B (I# Int#
dst_off#)) (B (I# Int#
len#))
  = forall s a. STRep s a -> ST s a
ST forall a b. (a -> b) -> a -> b
$ \State# s
s -> case forall d.
ByteArray#
-> Int#
-> MutableByteArray# d
-> Int#
-> Int#
-> State# d
-> State# d
GHC.Exts.copyByteArray# ByteArray#
src# Int#
src_off# MutableByteArray# s
dst# Int#
dst_off# Int#
len# State# s
s of
                 State# s
s' -> (# State# s
s', () #)

{-# INLINE newByteArray #-}
newByteArray :: B -> ST s (MBA s)
newByteArray :: forall s. B -> ST s (MBA s)
newByteArray (B (I# Int#
n#))
  = forall s a. STRep s a -> ST s a
ST forall a b. (a -> b) -> a -> b
$ \State# s
s -> case forall d. Int# -> State# d -> (# State# d, MutableByteArray# d #)
GHC.Exts.newByteArray# Int#
n# State# s
s of
                 (# State# s
s', MutableByteArray# s
mba# #) -> (# State# s
s', forall s. MutableByteArray# s -> MBA s
MBA# MutableByteArray# s
mba# #)

{-# INLINE writeWord8Array #-}
writeWord8Array :: MBA s -> B -> Word -> ST s ()
writeWord8Array :: forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array (MBA# MutableByteArray# s
mba#) (B (I# Int#
i#)) (W# Word#
w#)
  = forall s a. STRep s a -> ST s a
ST forall a b. (a -> b) -> a -> b
$ \State# s
s ->
#if __GLASGOW_HASKELL__ >= 902
      case forall d.
MutableByteArray# d -> Int# -> Word8# -> State# d -> State# d
GHC.Exts.writeWord8Array# MutableByteArray# s
mba# Int#
i# (Word# -> Word8#
GHC.Exts.wordToWord8# Word#
w#) State# s
s of
#else
      case GHC.Exts.writeWord8Array# mba# i# w# s of
#endif
                 State# s
s' -> (# State# s
s', () #)
{- not needed yet
{-# INLINE indexWord8Array #-}
indexWord8Array :: ShortText -> B -> Word
indexWord8Array (ShortText (BSSI.SBS ba#)) (B (I# i#)) = W# (GHC.Exts.indexWord8Array# ba# i#)
-}

{-# INLINE copyAddrToByteArray #-}
copyAddrToByteArray :: Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray :: forall a. Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray (Ptr Addr#
src#) (MBA# MutableByteArray# RealWorld
dst#) (B (I# Int#
dst_off#)) (B (I# Int#
len#))
  = forall s a. STRep s a -> ST s a
ST forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s -> case forall d.
Addr#
-> MutableByteArray# d -> Int# -> Int# -> State# d -> State# d
GHC.Exts.copyAddrToByteArray# Addr#
src# MutableByteArray# RealWorld
dst# Int#
dst_off# Int#
len# State# RealWorld
s of
                 State# RealWorld
s' -> (# State# RealWorld
s', () #)

----------------------------------------------------------------------------
-- unsafeFreezeShrink

#if __GLASGOW_HASKELL__ >= 710
-- for GHC versions which have the 'shrinkMutableByteArray#' primop
{-# INLINE unsafeFreezeShrink #-}
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink :: forall s. MBA s -> B -> ST s ShortText
unsafeFreezeShrink MBA s
mba B
n = do
  forall s. MBA s -> B -> ST s ()
shrink MBA s
mba B
n
  forall s. MBA s -> ST s ShortText
unsafeFreeze MBA s
mba

{-# INLINE shrink #-}
shrink :: MBA s -> B -> ST s ()
shrink :: forall s. MBA s -> B -> ST s ()
shrink (MBA# MutableByteArray# s
mba#) (B (I# Int#
i#))
  = forall s a. STRep s a -> ST s a
ST forall a b. (a -> b) -> a -> b
$ \State# s
s -> case forall d. MutableByteArray# d -> Int# -> State# d -> State# d
GHC.Exts.shrinkMutableByteArray# MutableByteArray# s
mba# Int#
i# State# s
s of
                 State# s
s' -> (# State# s
s', () #)
#else
-- legacy code for GHC versions which lack `shrinkMutableByteArray#` primop
{-# INLINE unsafeFreezeShrink #-}
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink mba0 n = do
  mba' <- newByteArray n
  copyByteArray2 mba0 0 mba' 0 n
  unsafeFreeze mba'

{-# INLINE copyByteArray2 #-}
copyByteArray2 :: MBA s -> B -> MBA s -> B -> B -> ST s ()
copyByteArray2 (MBA# src#) (B (I# src_off#)) (MBA# dst#) (B (I# dst_off#)) (B( I# len#))
  = ST $ \s -> case GHC.Exts.copyMutableByteArray# src# src_off# dst# dst_off# len# s of
                 s' -> (# s', () #)
#endif

----------------------------------------------------------------------------
-- Helpers for encoding code points into UTF-8 code units
--
--   7 bits| <    0x80 | 0xxxxxxx
--  11 bits| <   0x800 | 110yyyyx  10xxxxxx
--  16 bits| < 0x10000 | 1110yyyy  10yxxxxx  10xxxxxx
--  21 bits|           | 11110yyy  10yyxxxx  10xxxxxx  10xxxxxx

-- | Unicode Code-point
--
-- Keeping it as a 'Word' is more convenient for bit-ops and FFI
newtype CP = CP Word

{-# INLINE ch2cp #-}
ch2cp :: Char -> CP
ch2cp :: Char -> CP
ch2cp (Char -> Int
ord -> Int
ci)
  | forall i. (Num i, Bits i) => i -> Bool
isSurr Int
ci  = Word -> CP
CP Word
0xFFFD
  | Bool
otherwise  = Word -> CP
CP (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
ci)

{-# INLINE isSurr #-}
isSurr :: (Num i, Bits i) => i -> Bool
isSurr :: forall i. (Num i, Bits i) => i -> Bool
isSurr i
ci = i
ci forall a. Bits a => a -> a -> a
.&. i
0xfff800 forall a. Eq a => a -> a -> Bool
== i
0xd800

{-# INLINE cp2ch #-}
cp2ch :: CP -> Char
cp2ch :: CP -> Char
cp2ch (CP Word
w) = (Word
w forall a. Ord a => a -> a -> Bool
< Word
0x110000) forall a. (?callStack::CallStack) => Bool -> a -> a
`assert` Int -> Char
unsafeChr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
w)

-- used/needed by index-lookup functions to encode out of bounds
cp2chSafe :: CP -> Maybe Char
cp2chSafe :: CP -> Maybe Char
cp2chSafe CP
cp
  | CP -> Bool
cpNull CP
cp = forall a. Maybe a
Nothing
  | Bool
otherwise = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$! CP -> Char
cp2ch CP
cp
  where
    cpNull :: CP -> Bool
    cpNull :: CP -> Bool
cpNull (CP Word
w) = Word
w forall a. Ord a => a -> a -> Bool
>= Word
0x110000

{-# INLINE cpLen #-}
cpLen :: CP -> B
cpLen :: CP -> B
cpLen (CP Word
cp)
  | Word
cp forall a. Ord a => a -> a -> Bool
<    Word
0x80  = Int -> B
B Int
1
  | Word
cp forall a. Ord a => a -> a -> Bool
<   Word
0x800  = Int -> B
B Int
2
  | Word
cp forall a. Ord a => a -> a -> Bool
< Word
0x10000  = Int -> B
B Int
3
  | Bool
otherwise     = Int -> B
B Int
4

-- convenience wrapper; unsafe like readCodePoint
{-# INLINE decodeCharAtOfs #-}
decodeCharAtOfs :: ShortText -> B -> (Char,B)
decodeCharAtOfs :: ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs = (Char
c,B
ofs')
  where
    c :: Char
c    = CP -> Char
cp2ch CP
cp
    ofs' :: B
ofs' = B
ofs forall a. Num a => a -> a -> a
+ CP -> B
cpLen CP
cp
    cp :: CP
cp   = ShortText -> B -> CP
readCodePoint ShortText
st B
ofs
{- pure version of decodeCharAtOfs, but unfortunately significantly slower

decodeCharAtOfs st ofs
  | b0 < 0x80 = (cp2ch $ CP b0,ofs + B 1)
  | otherwise = case b0 `unsafeShiftR` 4 of
                  0xf -> (cp2ch $ CP go4, ofs + B 4)
                  0xe -> (cp2ch $ CP go3, ofs + B 3)
                  _   -> (cp2ch $ CP go2, ofs + B 2)
  where
    b0    = buf 0
    buf j = indexWord8Array st (ofs+j)

    go2 =     ((b0    .&. 0x1f) `unsafeShiftL` 6)
          .|.  (buf 1 .&. 0x3f)

    go3 =     ((b0    .&. 0x0f) `unsafeShiftL` (6+6))
          .|. ((buf 1 .&. 0x3f) `unsafeShiftL` 6)
          .|.  (buf 2 .&. 0x3f)

    go4 =     ((b0    .&. 0x07) `unsafeShiftL` (6+6+6))
          .|. ((buf 1 .&. 0x3f) `unsafeShiftL` (6+6))
          .|. ((buf 2 .&. 0x3f) `unsafeShiftL` 6)
          .|.  (buf 3 .&. 0x3f)
-}


-- | \(\mathcal{O}(1)\) Construct 'ShortText' from single codepoint.
--
-- prop> singleton c == pack [c]
--
-- prop> length (singleton c) == 1
--
-- >>> singleton 'A'
-- "A"
--
-- >>> map singleton ['\55295','\55296','\57343','\57344'] -- U+D7FF U+D800 U+DFFF U+E000
-- ["\55295","\65533","\65533","\57344"]
--
-- __Note__: This function is total because it replaces the (invalid) code-points U+D800 through U+DFFF with the replacement character U+FFFD.
--
-- @since 0.1.2
singleton :: Char -> ShortText
singleton :: Char -> ShortText
singleton = CP -> ShortText
singleton' forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> CP
ch2cp

singleton' :: CP -> ShortText
singleton' :: CP -> ShortText
singleton' cp :: CP
cp@(CP Word
cpw)
  | Word
cpw forall a. Ord a => a -> a -> Bool
<    Word
0x80  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
1 forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
0 CP
cp
  | Word
cpw forall a. Ord a => a -> a -> Bool
<   Word
0x800  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
2 forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
0 CP
cp
  | Word
cpw forall a. Ord a => a -> a -> Bool
< Word
0x10000  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
3 forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
0 CP
cp
  | Bool
otherwise      = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
4 forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
0 CP
cp

-- | \(\mathcal{O}(n)\) Prepend a character to a 'ShortText'.
--
-- prop> cons c t == singleton c <> t
--
-- @since 0.1.2
cons :: Char -> ShortText -> ShortText
cons :: Char -> ShortText -> ShortText
cons (Char -> CP
ch2cp -> cp :: CP
cp@(CP Word
cpw)) ShortText
sfx
  | B
n forall a. Eq a => a -> a -> Bool
== B
0         = CP -> ShortText
singleton' CP
cp
  | Word
cpw forall a. Ord a => a -> a -> Bool
<    Word
0x80  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
1) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
0 CP
cp forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. B -> MBA s -> ST s ()
copySfx B
1 MBA s
mba
  | Word
cpw forall a. Ord a => a -> a -> Bool
<   Word
0x800  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
2) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
0 CP
cp forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. B -> MBA s -> ST s ()
copySfx B
2 MBA s
mba
  | Word
cpw forall a. Ord a => a -> a -> Bool
< Word
0x10000  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
3) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
0 CP
cp forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. B -> MBA s -> ST s ()
copySfx B
3 MBA s
mba
  | Bool
otherwise      = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
4) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
0 CP
cp forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. B -> MBA s -> ST s ()
copySfx B
4 MBA s
mba
  where
    !n :: B
n = ShortText -> B
toB ShortText
sfx

    copySfx :: B -> MBA s -> ST s ()
    copySfx :: forall s. B -> MBA s -> ST s ()
copySfx B
ofs MBA s
mba = forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
sfx B
0 MBA s
mba B
ofs B
n

-- | \(\mathcal{O}(n)\) Append a character to the ond of a 'ShortText'.
--
-- prop> snoc t c == t <> singleton c
--
-- @since 0.1.2
snoc :: ShortText -> Char -> ShortText
snoc :: ShortText -> Char -> ShortText
snoc ShortText
pfx (Char -> CP
ch2cp -> cp :: CP
cp@(CP Word
cpw))
  | B
n forall a. Eq a => a -> a -> Bool
== B
0         = CP -> ShortText
singleton' CP
cp
  | Word
cpw forall a. Ord a => a -> a -> Bool
<    Word
0x80  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
1) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> ST s ()
copyPfx MBA s
mba forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
n CP
cp
  | Word
cpw forall a. Ord a => a -> a -> Bool
<   Word
0x800  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
2) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> ST s ()
copyPfx MBA s
mba forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
n CP
cp
  | Word
cpw forall a. Ord a => a -> a -> Bool
< Word
0x10000  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
3) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> ST s ()
copyPfx MBA s
mba forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
n CP
cp
  | Bool
otherwise      = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nforall a. Num a => a -> a -> a
+B
4) forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> forall s. MBA s -> ST s ()
copyPfx MBA s
mba forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
n CP
cp
  where
    !n :: B
n = ShortText -> B
toB ShortText
pfx

    copyPfx :: MBA s -> ST s ()
    copyPfx :: forall s. MBA s -> ST s ()
copyPfx MBA s
mba = forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
pfx B
0 MBA s
mba B
0 B
n

{-
writeCodePoint :: MBA s -> Int -> Word -> ST s ()
writeCodePoint mba ofs cp
  | cp <    0x80  = writeCodePoint1 mba ofs cp
  | cp <   0x800  = writeCodePoint2 mba ofs cp
  | cp < 0x10000  = writeCodePoint3 mba ofs cp
  | otherwise     = writeCodePoint4 mba ofs cp
-}

writeCodePointN :: B -> MBA s -> B -> CP -> ST s ()
writeCodePointN :: forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
1 = forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1
writeCodePointN B
2 = forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2
writeCodePointN B
3 = forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3
writeCodePointN B
4 = forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4
writeCodePointN B
_ = forall a. (?callStack::CallStack) => a
undefined

writeCodePoint1 :: MBA s -> B -> CP -> ST s ()
writeCodePoint1 :: forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
ofs (CP Word
cp) =
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba B
ofs Word
cp

writeCodePoint2 :: MBA s -> B -> CP -> ST s ()
writeCodePoint2 :: forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
ofs (CP Word
cp) = do
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba  B
ofs    (Word
0xc0 forall a. Bits a => a -> a -> a
.|. (Word
cp forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
6))
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
1) (Word
0x80 forall a. Bits a => a -> a -> a
.|. (Word
cp                     forall a. Bits a => a -> a -> a
.&. Word
0x3f))

writeCodePoint3 :: MBA s -> B -> CP -> ST s ()
writeCodePoint3 :: forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
ofs (CP Word
cp) = do
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba  B
ofs    (Word
0xe0 forall a. Bits a => a -> a -> a
.|.  (Word
cp forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
12))
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
1) (Word
0x80 forall a. Bits a => a -> a -> a
.|. ((Word
cp forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
6)  forall a. Bits a => a -> a -> a
.&. Word
0x3f))
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
2) (Word
0x80 forall a. Bits a => a -> a -> a
.|.  (Word
cp                    forall a. Bits a => a -> a -> a
.&. Word
0x3f))

writeCodePoint4 :: MBA s -> B -> CP -> ST s ()
writeCodePoint4 :: forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
ofs (CP Word
cp) = do
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba  B
ofs    (Word
0xf0 forall a. Bits a => a -> a -> a
.|.  (Word
cp forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
18))
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
1) (Word
0x80 forall a. Bits a => a -> a -> a
.|. ((Word
cp forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
12) forall a. Bits a => a -> a -> a
.&. Word
0x3f))
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
2) (Word
0x80 forall a. Bits a => a -> a -> a
.|. ((Word
cp forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
6)  forall a. Bits a => a -> a -> a
.&. Word
0x3f))
  forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsforall a. Num a => a -> a -> a
+B
3) (Word
0x80 forall a. Bits a => a -> a -> a
.|. (Word
cp                     forall a. Bits a => a -> a -> a
.&. Word
0x3f))

-- beware: UNSAFE!
readCodePoint :: ShortText -> B -> CP
readCodePoint :: ShortText -> B -> CP
readCodePoint ShortText
st (B -> CSize
csizeFromB -> CSize
ofs)
  = Word -> CP
CP forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO Word
c_text_short_ofs_cp (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
ofs)

foreign import ccall unsafe "hs_text_short_ofs_cp" c_text_short_ofs_cp :: ByteArray# -> CSize -> IO CCodePoint

readCodePointRev :: ShortText -> B -> CP
readCodePointRev :: ShortText -> B -> CP
readCodePointRev ShortText
st (B -> CSize
csizeFromB -> CSize
ofs)
  = Word -> CP
CP forall a b. (a -> b) -> a -> b
$ forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO Word
c_text_short_ofs_cp_rev (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
ofs)

foreign import ccall unsafe "hs_text_short_ofs_cp_rev" c_text_short_ofs_cp_rev :: ByteArray# -> CSize -> IO CCodePoint

----------------------------------------------------------------------------
-- string & list literals

-- | __Note__: Surrogate pairs (@[U+D800 .. U+DFFF]@) character literals are replaced by U+FFFD.
--
-- @since 0.1.2
instance GHC.Exts.IsList ShortText where
    type (Item ShortText) = Char
    fromList :: [Item ShortText] -> ShortText
fromList = String -> ShortText
fromString
    toList :: ShortText -> [Item ShortText]
toList   = ShortText -> String
toString

-- | __Note__: Surrogate pairs (@[U+D800 .. U+DFFF]@) in string literals are replaced by U+FFFD.
--
-- This matches the behaviour of 'S.IsString' instance for 'T.Text'.
instance S.IsString ShortText where
    fromString :: String -> ShortText
fromString = String -> ShortText
fromStringLit

-- i.e., don't inline before Phase 0
{-# INLINE [0] fromStringLit #-}
fromStringLit :: String -> ShortText
fromStringLit :: String -> ShortText
fromStringLit = String -> ShortText
fromString

{-# RULES "ShortText empty literal" fromStringLit "" = mempty #-}

-- TODO: this doesn't seem to fire
{-# RULES "ShortText singleton literal" forall c . fromStringLit [c] = singleton c #-}

{-# RULES "ShortText literal ASCII" forall s . fromStringLit (GHC.unpackCString# s) = fromLitAsciiAddr# s #-}

{-# RULES "ShortText literal UTF-8" forall s . fromStringLit (GHC.unpackCStringUtf8# s) = fromLitMUtf8Addr# s #-}

{-# NOINLINE fromLitAsciiAddr# #-}
fromLitAsciiAddr# :: Addr# -> ShortText
fromLitAsciiAddr# :: Addr# -> ShortText
fromLitAsciiAddr# (forall a. Addr# -> Ptr a
Ptr -> Ptr CChar
ptr) = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$ do
  B
sz <- CSize -> B
csizeToB forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Ptr CChar -> IO CSize
c_strlen Ptr CChar
ptr

  case B
sz forall a. Ord a => a -> a -> Ordering
`compare` B
0 of
    Ordering
EQ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Monoid a => a
mempty -- should not happen if rules fire correctly
    Ordering
GT -> forall a. ST RealWorld a -> IO a
stToIO forall a b. (a -> b) -> a -> b
$ do
      MBA RealWorld
mba <- forall s. B -> ST s (MBA s)
newByteArray B
sz
      forall a. Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray Ptr CChar
ptr MBA RealWorld
mba B
0 B
sz
      forall s. MBA s -> ST s ShortText
unsafeFreeze MBA RealWorld
mba
    Ordering
LT -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. (?callStack::CallStack) => String -> a
error String
"fromLitAsciiAddr#")
          -- NOTE: should never happen unless strlen(3) overflows (NB: CSize
          -- is unsigned; the overflow would occur when converting to
          -- 'B')

foreign import ccall unsafe "strlen" c_strlen :: CString -> IO CSize

-- GHC uses an encoding resembling Modified UTF-8 for non-ASCII string-literals
{-# NOINLINE fromLitMUtf8Addr# #-}
fromLitMUtf8Addr# :: Addr# -> ShortText
fromLitMUtf8Addr# :: Addr# -> ShortText
fromLitMUtf8Addr# (forall a. Addr# -> Ptr a
Ptr -> Ptr CChar
ptr) = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$ do
  B
sz <- Int -> B
B forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Ptr CChar -> IO Int
c_text_short_mutf8_strlen Ptr CChar
ptr

  case B
sz forall a. Ord a => a -> a -> Ordering
`compare` B
0 of
    Ordering
EQ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Monoid a => a
mempty -- should not happen if rules fire correctly
    Ordering
GT -> forall a. ST RealWorld a -> IO a
stToIO forall a b. (a -> b) -> a -> b
$ do
      MBA RealWorld
mba <- forall s. B -> ST s (MBA s)
newByteArray B
sz
      forall a. Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray Ptr CChar
ptr MBA RealWorld
mba B
0 B
sz
      forall s. MBA s -> ST s ShortText
unsafeFreeze MBA RealWorld
mba
    Ordering
LT -> do
      MBA RealWorld
mba <- forall a. ST RealWorld a -> IO a
stToIO (forall s. B -> ST s (MBA s)
newByteArray (forall a. Num a => a -> a
abs B
sz))
      Ptr CChar -> MutableByteArray# RealWorld -> IO ()
c_text_short_mutf8_trans Ptr CChar
ptr (forall s. MBA s -> MutableByteArray# s
unMBA# MBA RealWorld
mba)
      forall a. ST RealWorld a -> IO a
stToIO (forall s. MBA s -> ST s ShortText
unsafeFreeze MBA RealWorld
mba)

foreign import ccall unsafe "hs_text_short_mutf8_strlen" c_text_short_mutf8_strlen :: CString -> IO Int

foreign import ccall unsafe "hs_text_short_mutf8_trans" c_text_short_mutf8_trans :: CString -> MutableByteArray# RealWorld -> IO ()

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Text.Short (pack, unpack, concat)
-- >>> import Text.Show.Functions ()
-- >>> import qualified Test.QuickCheck.Arbitrary as QC
-- >>> import Test.QuickCheck.Instances ()
-- >>> instance QC.Arbitrary ShortText where { arbitrary = fmap fromString QC.arbitrary }