Visual Basic for Applications/Pseudo Random Repeated Substrings

< Visual Basic for Applications

The VBA Rnd() Function

Worst Case for Rnd() Substrings?

Option Explicit

Sub TestRndForRepeats()
    'run this to make a pseudo random string
    'and to test it for longest repeated substring
    
    Dim strRnd As String, sOut As String
    Dim nOut As Long, nLen As Long
    
    strRnd = MakeLongRndStr(1000)
    MsgBox strRnd,, "Long string..."
    
    sOut = LongestRepeatSubstring(strRnd, nOut)
    
    MsgBox "Repeated substring found is : " & _
       vbCrLf & sOut & vbCrLf & _
       "Number of these found : " & nOut & vbCrLf & _
       "Length of each : " & Len(sOut),, "Repeat substring..."

End Sub

Sub TestHashForRepeats()
    'run this to make a long hash-based output
    'and to test it for longest repeated substring
    
    Dim sOut As String, sHash As String, nOut As Long
    
    sHash = LongHash("String to hash", 1000)
    
    MsgBox "The following sha256-based hash has " & _
           Len(sHash) & " characters." & _
           vbCrLf & vbCrLf & sHash,, "Long hash..."

    sOut = LongestRepeatSubstring(sHash, nOut)
    
    MsgBox "Repeated substring found is : " & _
       vbCrLf & sOut & vbCrLf & _
       "Number of these found : " & nOut & vbCrLf & _
       "Length of each : " & Len(sOut),, "Repeat substring..."

End Sub

Function MakeLongRndStr(nNumChr As Long) As String
    'Makes a long capital letters string using rnd VBA function
    
    Dim n As Long, sChr As String, nAsc As Long
    Dim nSamp As Long, sRec As String
    
    '========================================================================
    ' Notes and Conclusions:
    ' The VBA function rnd is UNSUITED to generation of long random strings.
    ' Both length and number of repeats increases rapidly near 256 charas.
    ' Reasonable results can be obtained by keeping below 128 characters.
    ' For longer strings, consider hash-based methods of generation.
    '========================================================================
    'Randomize 'right place
    Do Until n >= nNumChr
        'DoEvents
        Randomize 'wrong place
        nSamp = Int((122 - 48 + 1) * Rnd + 48) 'range includes all charas
        sChr = Chr(nSamp)
        
        'cherry-picks 10, 26, 36, 52, or 62 from a set of 75
        Select Case nSamp 'chara filter
                Case 65 To 90  'upper case letters
                    sRec = sRec & sChr
                Case 48 To 57  'integers
                    'sRec = sRec & sChr
                Case 97 To 122 'lower case letters
                    'sRec = sRec & sChr
                Case Else
                    'disregard
        End Select
        n = Len(sRec)
    Loop
    
    'MsgBox sAccum
    
    MakeLongRndStr = Left$(sRec, nNumChr)

End Function

Function LongHash(sIn As String, nReq As Long, Optional sSeed As String = "") As String
    'makes a long sha256 hash - length specified by user
    'Parameters: sIn;   the string to hash
                'nReq;  the length of output needed
                'sSeed; optional added string modifier
    
    Dim n As Long, m As Long, c As Long, nAsc As Integer, sChr As String
    Dim sF As String, sHash As String, sRec As String, sAccum As String
    
    Do Until m >= nReq
        DoEvents
        n = n + 1 'increment
        'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
        'you set your own cycle increment here
        sF = sIn & sSeed & sAccum & (7 * n * m / 3)
        'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
        'get a single hash of sF
        sHash = HashSHA256(sF)
        'filter output for chara type
        For c = 1 To Len(sHash)
            sChr = Mid$(sHash, c, 1)
            nAsc = Asc(sChr)
            'cherry-picks 10, 26, 36 ,52, or 62 from a set of 64
            Select Case nAsc 'chara filter
                Case 65 To 90  'upper case letters
                    sRec = sRec & sChr
                Case 48 To 57  'integers
                    'sRec = sRec & sChr
                Case 97 To 122 'lower case letters
                    'sRec = sRec & sChr
                Case Else
                    'disregard
            End Select
        Next c
        'accumulate
        sAccum = sAccum & sRec
        m = Len(sAccum)
        sRec = "" 'delete line at your peril!
    Loop
    
    LongHash = Left$(sAccum, nReq)

End Function

Function HashSHA256(sIn As String) As String
    'Set a reference to mscorlib 4.0 64-bit
    'HASHES sIn string using SHA2-256 algorithm
    
    'NOTE
    'total 88 output text charas of base 64
    'Standard empty string input gives : 47DEQpj8HBSa+/...etc,
    
    Dim oT As Object, oSHA256 As Object
    Dim TextToHash() As Byte, bytes() As Byte
    
    Set oT = CreateObject("System.Text.UTF8Encoding")
    Set oSHA256 = CreateObject("System.Security.Cryptography.SHA256Managed")
    
    TextToHash = oT.GetBytes_4(sIn)
    bytes = oSHA256.ComputeHash_2((TextToHash))
    
    HashSHA256 = ConvB64(bytes)
    
    Set oT = Nothing
    Set oSHA256 = Nothing
   
End Function

Function ConvB64(vIn As Variant) As Variant
    'used to produce a base-64 output
    'Set a reference to mscorlib 4.0 64-bit
    
    Dim oD As Object
    
    Set oD = CreateObject("MSXML2.DOMDocument")
    With oD
        .LoadXML "<root />"
        .DocumentElement.DataType = "bin.base64"
        .DocumentElement.nodeTypedValue = vIn
    End With
    ConvB64 = Replace(oD.DocumentElement.Text, vbLf, "")
    
    Set oD = Nothing
    
End Function

Function LongestRepeatSubstring(sIn As String, Optional nSS As Long) As String
    'finds longest repeated non-overlapping substring and number of repeats
    'greatest number cycles = (0.5*n)^2 for when "none found", eg; abcdef (9)
    'shortest number cycles = 1 for one simple duplicated string; eg abcabc
    
    Dim s1 As String, s2 As String, X As Long
    Dim sPrev As String, nPrev As Long, nLPrev As Long
    Dim nL As Long, nTrial As Long, nPos As Long, vAr As Variant
        
    nL = Len(sIn)
    For nTrial = Int(nL / 2) To 1 Step -1
        DoEvents
        For nPos = 1 To (nL - (2 * nTrial) + 1)
            X = 0
            s1 = Mid(sIn, nPos, nTrial)
            s2 = Right(sIn, (nL - nPos - nTrial + 1))
            vAr = Split(s2, s1)
            X = UBound(vAr) - LBound(vAr)
            If X > 0 Then
                If nPrev < X Then
                    sPrev = s1
                    nPrev = X
                End If
            End If
        Next nPos
        If nPrev <> 0 Then
            LongestRepeatSubstring = sPrev
            nSS = nPrev
            Exit Function
        End If
    Next nTrial
End Function
This article is issued from Wikibooks. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.