How to improve the code (Delphi) for downloading and searching the dictionary?

I am a Delphi programmer. I created a program that uses dictionaries with words and expressions (loaded into the program as a "string array"). It uses a search algorithm based on their “checksum” (I hope this is the right word). The string is converted to integer based on this:

var
   FHashSize: Integer; //stores the value of GetHashSize
   HashTable, HashTableNoCase: array[Byte] of Longword;
   HashTableInit: Boolean = False;

const
   AnsiLowCaseLookup: array[AnsiChar] of AnsiChar = (
      #$00, #$01, #$02, #$03, #$04, #$05, #$06, #$07,
      #$08, #$09, #$0A, #$0B, #$0C, #$0D, #$0E, #$0F,
      #$10, #$11, #$12, #$13, #$14, #$15, #$16, #$17,
      #$18, #$19, #$1A, #$1B, #$1C, #$1D, #$1E, #$1F,
      #$20, #$21, #$22, #$23, #$24, #$25, #$26, #$27,
      #$28, #$29, #$2A, #$2B, #$2C, #$2D, #$2E, #$2F,
      #$30, #$31, #$32, #$33, #$34, #$35, #$36, #$37,
      #$38, #$39, #$3A, #$3B, #$3C, #$3D, #$3E, #$3F,
      #$40, #$61, #$62, #$63, #$64, #$65, #$66, #$67,
      #$68, #$69, #$6A, #$6B, #$6C, #$6D, #$6E, #$6F,
      #$70, #$71, #$72, #$73, #$74, #$75, #$76, #$77,
      #$78, #$79, #$7A, #$5B, #$5C, #$5D, #$5E, #$5F,
      #$60, #$61, #$62, #$63, #$64, #$65, #$66, #$67,
      #$68, #$69, #$6A, #$6B, #$6C, #$6D, #$6E, #$6F,
      #$70, #$71, #$72, #$73, #$74, #$75, #$76, #$77,
      #$78, #$79, #$7A, #$7B, #$7C, #$7D, #$7E, #$7F,
      #$80, #$81, #$82, #$83, #$84, #$85, #$86, #$87,
      #$88, #$89, #$8A, #$8B, #$8C, #$8D, #$8E, #$8F,
      #$90, #$91, #$92, #$93, #$94, #$95, #$96, #$97,
      #$98, #$99, #$9A, #$9B, #$9C, #$9D, #$9E, #$9F,
      #$A0, #$A1, #$A2, #$A3, #$A4, #$A5, #$A6, #$A7,
      #$A8, #$A9, #$AA, #$AB, #$AC, #$AD, #$AE, #$AF,
      #$B0, #$B1, #$B2, #$B3, #$B4, #$B5, #$B6, #$B7,
      #$B8, #$B9, #$BA, #$BB, #$BC, #$BD, #$BE, #$BF,
      #$C0, #$C1, #$C2, #$C3, #$C4, #$C5, #$C6, #$C7,
      #$C8, #$C9, #$CA, #$CB, #$CC, #$CD, #$CE, #$CF,
      #$D0, #$D1, #$D2, #$D3, #$D4, #$D5, #$D6, #$D7,
      #$D8, #$D9, #$DA, #$DB, #$DC, #$DD, #$DE, #$DF,
      #$E0, #$E1, #$E2, #$E3, #$E4, #$E5, #$E6, #$E7,
      #$E8, #$E9, #$EA, #$EB, #$EC, #$ED, #$EE, #$EF,
      #$F0, #$F1, #$F2, #$F3, #$F4, #$F5, #$F6, #$F7,
      #$F8, #$F9, #$FA, #$FB, #$FC, #$FD, #$FE, #$FF);

implementation

function GetHashSize(const Count: Integer): Integer;
begin
   if Count < 65 then
      Result := 256
   else
      Result := Round(IntPower(16, Ceil(Log10(Count div 4) / Log10(16))));
end;

function Hash(const Hash: LongWord; const Buf; const BufSize: Integer): LongWord;
var P: PByte;
   I: Integer;
begin
   P := @Buf;
   Result := Hash;
   for I := 1 to BufSize do
   begin
      Result := HashTable[Byte(Result) xor P^] xor (Result shr 8);
      Inc(P);
   end;
end;

function HashStrBuf(const StrBuf: Pointer; const StrLength: Integer; const Slots: LongWord): LongWord;
var P: PChar;
   I, J: Integer;
begin
   if not HashTableInit then
      InitHashTable;
   P := StrBuf;
   if StrLength <= 48 then // Hash all characters for short strings
      Result := Hash($FFFFFFFF, P^, StrLength)
   else
   begin
      // Hash first 16 bytes
      Result := Hash($FFFFFFFF, P^, 16);
      // Hash last 16 bytes
      Inc(P, StrLength - 16);
      Result := Hash(Result, P^, 16);
      // Hash 16 bytes sampled from rest of string
      I := (StrLength - 48) div 16;
      P := StrBuf;
      Inc(P, 16);
      for J := 1 to 16 do
      begin
         Result := HashTable[Byte(Result) xor Byte(P^)] xor (Result shr 8);
         Inc(P, I + 1);
      end;
   end;
  // Mod into slots
   if Slots <> 0 then
      Result := Result mod Slots;
end;

procedure InitHashTable;
var I, J: Byte;
   R: LongWord;
begin
   for I := $00 to $FF do
   begin
      R := I;
      for J := 8 downto 1 do
         if R and 1 <> 0 then
            R := (R shr 1) xor $EDB88320
         else
            R := R shr 1;
      HashTable[I] := R;
   end;
   Move(HashTable, HashTableNoCase, Sizeof(HashTable));
   for I := Ord('A') to Ord('Z') do
      HashTableNoCase[I] := HashTableNoCase[I or 32];
   HashTableInit := True;
end;

HashStrBuf " (FHashSize - 1)" " Integer" ( FHashSize) " ", , " ", "" , , " ".

. " " 2/3 " " . - . : ['a'.. 'z', # 224.. # 246, # 248.. # 254, # 154, # 156.. # 159, # 179, # 186, # 191, # 190, # 185, '0'.. '9', '' ''] "", " " ? , - " Integer" (FHashSize), , Ram.

: / ( " " ). " " . ... ? , "", , " " HDD ...

...

PS: :

function TDictionary.LocateKey(const Key: AnsiString): Integer;
var i, j, l, H: Integer;
   P, Q: PChar;
begin
   Result := -1;
   l := Length(Key);
   H := HashStrBuf(@Key[1], l, 0) and (FHashSize - 1);
   P := @Key[1];
   for i := 0 to High(FHash[H]) do  //FHash is that "array of array of integer"
   begin
      if l <> FKeys.ItemSize[FHash[H][i]] then //FKeys.ItemSize is an byte array with the lengths of strings from dictionary
         Continue;
      Q := FKeys.Pointer(FHash[H][i]); //pointer to string in dictionary
      for j := 0 to l - 1 do
         if (P + j)^ <> (Q + j)^ then
            Break;
      if j = l then
      begin
         Result := FHash[H][i];
         Exit;
      end;
   end;
end;
+3
3

!

IMHO , .

IniFiles thashedStringList. , .

Delphi , SuperObject ...

SynBigTable, , . TDynArray TList- , . , , , , .

:

, - RawByteString AnsiString, PPtrInt PPointer PtrInt Integer Delphi 7:

function Hash32(const Text: RawByteString): cardinal;
function SubHash(P: PCardinalArray): cardinal;
{$ifdef HASINLINE}inline;{$endif}
var s1,s2: cardinal;
    i, L: PtrInt;
const Mask: array[0..3] of cardinal = (0,$ff,$ffff,$ffffff);
begin
  if P<>nil then begin
    L := PPtrInt(PtrInt(P)-4)^; // fast lenght(Text)
    s1 := 0;
    s2 := 0;
    for i := 1 to L shr 4 do begin // 16 bytes (4 DWORD) by loop - aligned read
      inc(s1,P^[0]);
      inc(s2,s1);
      inc(s1,P^[1]);
      inc(s2,s1);
      inc(s1,P^[2]);
      inc(s2,s1);
      inc(s1,P^[3]);
      inc(s2,s1);
      inc(PtrUInt(P),16);
    end;
    for i := 1 to (L shr 2)and 3 do begin // 4 bytes (DWORD) by loop
      inc(s1,P^[0]);
      inc(s2,s1);
      inc(PtrUInt(P),4);
    end;
    inc(s1,P^[0] and Mask[L and 3]);      // remaining 0..3 bytes
    inc(s2,s1);
    result := s1 xor (s2 shl 16);
  end else
    result := 0;
end;
begin // use a sub function for better code generation under Delphi
  result := SubHash(pointer(Text));
end;

SynCommons.pas asm, . - ( , crc32/adler32/IniFiles.hash...). adler32, DWORD . , SSE asm, - Delphi.

"" / " " , IniFiles. .

, , .

+3

Delphi 7, Julian Bucknall Delphi , EzDsl (Easy Data Structures ).

, .

ezdsl, , Delphi 7, , . >

, EHash -, inable, , .

, Unicode Delphi; unicode , , . OP Delphi 7, Unicode .

+2

I think you will find the database (without checksums) much faster. Perhaps try sqlite, which will give you one database. There are many Delphi libraries.

+1
source

All Articles