Here is a quick set of code to return tuples holding a unique list of twitter style UserIds, Hash Tags, and Urls from a block of text. This also takes advantage of the async workflow constructs in F# and Active Pattern matching.
let (|Matches|_|) (pat:string) (inp:string) =
let m = Regex.Matches(inp, pat) in
if m.Count > 0 then
Some ([ for g in m -> g.Value ])
else
None
let getUrls txt =
// Regex for URLs
let linkPat = "(http:\/\/\S+)"
match txt with
| Matches linkPat urls -> urls
| _ -> []
let getTags txt =
// Regex for Hash Tags
let linkPat = "[#]+([A-Za-z0-9-_]+)"
match txt with
| Matches linkPat tags -> tags
| _ -> []
let getUsers txt =
// Regex for Users
let linkPat = "[@]+([A-Za-z0-9-_]+)"
match txt with
| Matches linkPat users -> users
| _ -> []
let parseTxtForTokens txt =
let opUrl = async {
let urls = getUrls txt
return urls
}
let opTags = async {
let tags = getTags txt
return tags
}
let opUsers = async {
let usrs = getUsers txt
return usrs
}
let items = Async.Parallel [opUrl; opTags; opUsers] |> Async.RunSynchronously |> Array.toList
(items.[0], items.[1], items.[2])
Advertisement