{
  "_id": "6a1f24beb401979e73420e3e",
  "Package": "textTools",
  "Type": "Package",
  "Title": "Functions for Text Cleansing and Text Analysis",
  "Version": "0.1.0",
  "Author": "Timothy Conwell",
  "Maintainer": "Timothy Conwell <timconwell@gmail.com>",
  "Description": "A framework for text cleansing and analysis. Conveniently\nprepare and process large amounts of text for analysis.\nIncludes various metrics for word counts/frequencies that scale\nefficiently. Quickly analyze large amounts of text data using a\ntext.table (a data.table created with one word (or unit of text\nanalysis) per row, similar to the tidytext format). Offers\nflexibility to efficiently work with text data stored in\nvectors as well as text data formatted as a text.table.",
  "License": "GPL (>= 2)",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.1.1",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-16 06:21:02 UTC",
    "User": "root"
  },
  "Repository": "https://tconwell.r-universe.dev",
  "Date/Publication": "2021-02-05 08:00:05 UTC",
  "RemoteUrl": "https://github.com/cran/textTools",
  "RemoteRef": "HEAD",
  "RemoteSha": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
  "MD5sum": "16ee22770fba2107d1a0009baf7f3b09",
  "_user": "tconwell",
  "_type": "src",
  "_file": "textTools_0.1.0.tar.gz",
  "_fileid": "8cb747488e6b756daa49eed11f6d5735edf27fde50aed49d046be92c1b136c06",
  "_filesize": 1382994,
  "_sha256": "8cb747488e6b756daa49eed11f6d5735edf27fde50aed49d046be92c1b136c06",
  "_created": "2026-05-16T06:21:02.000Z",
  "_published": "2026-06-02T18:45:18.203Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79146558290,
      "time": 113,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7031159274"
    },
    {
      "job": 79146558503,
      "time": 105,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7031158582"
    },
    {
      "job": 79146558521,
      "time": 172,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7031226156"
    },
    {
      "job": 79146558178,
      "time": 275,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7031216434"
    },
    {
      "job": 79146557618,
      "time": 152,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7031147594"
    },
    {
      "job": 79146557724,
      "time": 102,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7366827369"
    },
    {
      "job": 79146558526,
      "time": 75,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7031155729"
    },
    {
      "job": 79146558850,
      "time": 90,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7031157332"
    },
    {
      "job": 79146558498,
      "time": 94,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7031157534"
    }
  ],
  "_buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/cran/textTools",
  "_commit": {
    "id": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
    "author": "Timothy Conwell <timconwell@gmail.com>",
    "committer": "cran-robot <csardi.gabor+cran@gmail.com>",
    "message": "version 0.1.0\n",
    "time": 1612512005
  },
  "_maintainer": {
    "name": "Timothy Conwell",
    "email": "timconwell@gmail.com",
    "login": "tconwell",
    "uuid": 51293949
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "data.table",
      "role": "Depends"
    }
  ],
  "_owner": "cran",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [],
  "_tags": [],
  "_stars": 0,
  "_contributors": [
    {
      "user": "tconwell",
      "count": 1,
      "uuid": 51293949
    }
  ],
  "_userbio": {
    "uuid": 51293949,
    "type": "user",
    "name": "Tim Conwell"
  },
  "_downloads": {
    "count": 210,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/textTools"
  },
  "_searchresults": 4,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/textTools.html",
    "manual.pdf"
  ],
  "_realowner": "tconwell",
  "_cranurl": false,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2021-02-05"
    }
  ],
  "_exports": [
    "as.text.table",
    "flag_words",
    "l_pos",
    "label_parts_of_speech",
    "ngrams",
    "pos",
    "regex_paragraph",
    "regex_sentence",
    "regex_word",
    "rm_frequent_words",
    "rm_infrequent_words",
    "rm_long_words",
    "rm_no_overlap",
    "rm_overlap",
    "rm_parts_of_speech",
    "rm_regexp_match",
    "rm_short_words",
    "rm_words",
    "sampleStr",
    "stopwords",
    "str_any_match",
    "str_count_intersect",
    "str_count_jaccard_similarity",
    "str_count_match",
    "str_count_nomatch",
    "str_count_positional_match",
    "str_count_positional_nomatch",
    "str_count_setdiff",
    "str_counts",
    "str_dt_col_combine",
    "str_extract_match",
    "str_extract_nomatch",
    "str_extract_positional_match",
    "str_extract_positional_nomatch",
    "str_rm_blank_space",
    "str_rm_long_words",
    "str_rm_non_alphanumeric",
    "str_rm_non_printable",
    "str_rm_numbers",
    "str_rm_punctuation",
    "str_rm_regexp_match",
    "str_rm_short_words",
    "str_rm_words",
    "str_rm_words_by_length",
    "str_stopwords_by_part_of_speech",
    "str_tolower",
    "str_weighted_count_match"
  ],
  "_help": [
    {
      "page": "as.text.table",
      "title": "Convert a data.table column of character vectors into a column with one row per word grouped by a grouping column. Optionally will split a column of strings into vectors of constituents.",
      "topics": [
        "as.text.table"
      ]
    },
    {
      "page": "flag_words",
      "title": "Flag rows in a text.table with specific words",
      "topics": [
        "flag_words"
      ]
    },
    {
      "page": "l_pos",
      "title": "Parts of speech for English words from the Moby Project.",
      "topics": [
        "l_pos"
      ]
    },
    {
      "page": "label_parts_of_speech",
      "title": "Add a column with the parts of speech for each word in a text.table",
      "topics": [
        "label_parts_of_speech"
      ]
    },
    {
      "page": "ngrams",
      "title": "Create n-grams",
      "topics": [
        "ngrams"
      ]
    },
    {
      "page": "pos",
      "title": "Parts of speech for English words from the Moby Project.",
      "topics": [
        "pos"
      ]
    },
    {
      "page": "regex_paragraph",
      "title": "Regular expression that might be used to split strings of text into component paragraphs.",
      "topics": [
        "regex_paragraph"
      ]
    },
    {
      "page": "regex_sentence",
      "title": "Regular expression that might be used to split strings of text into component sentences.",
      "topics": [
        "regex_sentence"
      ]
    },
    {
      "page": "regex_word",
      "title": "Regular expression that might be used to split strings of text into component words.",
      "topics": [
        "regex_word"
      ]
    },
    {
      "page": "rm_frequent_words",
      "title": "Delete rows in a text.table where the number of identical records within a group is more than a certain threshold",
      "topics": [
        "rm_frequent_words"
      ]
    },
    {
      "page": "rm_infrequent_words",
      "title": "Delete rows in a text.table where the number of identical records within a group is less than a certain threshold",
      "topics": [
        "rm_infrequent_words"
      ]
    },
    {
      "page": "rm_long_words",
      "title": "Delete rows in a text.table where the word has more than a minimum number of characters",
      "topics": [
        "rm_long_words"
      ]
    },
    {
      "page": "rm_no_overlap",
      "title": "Delete rows in a text.table where the records within a group are not also found in other groups (overlapping records)",
      "topics": [
        "rm_no_overlap"
      ]
    },
    {
      "page": "rm_overlap",
      "title": "Delete rows in a text.table where the records within a group are also found in other groups (overlapping records)",
      "topics": [
        "rm_overlap"
      ]
    },
    {
      "page": "rm_parts_of_speech",
      "title": "Delete rows in a text.table where the word has a certain part of speech",
      "topics": [
        "rm_parts_of_speech"
      ]
    },
    {
      "page": "rm_regexp_match",
      "title": "Delete rows in a text.table where the record has a certain pattern indicated by a regular expression",
      "topics": [
        "rm_regexp_match"
      ]
    },
    {
      "page": "rm_short_words",
      "title": "Delete rows in a text.table where the word has less than a minimum number of characters",
      "topics": [
        "rm_short_words"
      ]
    },
    {
      "page": "rm_words",
      "title": "Remove rows from a text.table with specific words",
      "topics": [
        "rm_words"
      ]
    },
    {
      "page": "sampleStr",
      "title": "Generates (pseudo)random strings of the specified char length",
      "topics": [
        "sampleStr"
      ]
    },
    {
      "page": "stopwords",
      "title": "Vector of lowercase English stop words.",
      "topics": [
        "stopwords"
      ]
    },
    {
      "page": "str_any_match",
      "title": "Detect if there are any words in a vector also found in another vector.",
      "topics": [
        "str_any_match"
      ]
    },
    {
      "page": "str_count_intersect",
      "title": "Count the intersecting words in a vector that are found in another vector (only counts unique words).",
      "topics": [
        "str_count_intersect"
      ]
    },
    {
      "page": "str_count_jaccard_similarity",
      "title": "Calculates the intersect divided by union of two vectors of words.",
      "topics": [
        "str_count_jaccard_similarity"
      ]
    },
    {
      "page": "str_count_match",
      "title": "Count the words in a vector that are found in another vector.",
      "topics": [
        "str_count_match"
      ]
    },
    {
      "page": "str_count_nomatch",
      "title": "Count the words in a vector that are not found in another vector.",
      "topics": [
        "str_count_nomatch"
      ]
    },
    {
      "page": "str_count_positional_match",
      "title": "Count words from a vector that are found in the same position in another vector.",
      "topics": [
        "str_count_positional_match"
      ]
    },
    {
      "page": "str_count_positional_nomatch",
      "title": "Count words from a vector that are not found in the same position in another vector.",
      "topics": [
        "str_count_positional_nomatch"
      ]
    },
    {
      "page": "str_count_setdiff",
      "title": "Count the words in a vector that don't intersect with another vector (only counts unique words).",
      "topics": [
        "str_count_setdiff"
      ]
    },
    {
      "page": "str_counts",
      "title": "Create a list of a vector of unique words found in x and a vector of the counts of each word in x.",
      "topics": [
        "str_counts"
      ]
    },
    {
      "page": "str_dt_col_combine",
      "title": "Combine columns of a data.table into a list in a new column, wraps list(unlist(c(...)))",
      "topics": [
        "str_dt_col_combine"
      ]
    },
    {
      "page": "str_extract_match",
      "title": "Extract words from a vector that are found in another vector.",
      "topics": [
        "str_extract_match"
      ]
    },
    {
      "page": "str_extract_nomatch",
      "title": "Extract words from a vector that are not found in another vector.",
      "topics": [
        "str_extract_nomatch"
      ]
    },
    {
      "page": "str_extract_positional_match",
      "title": "Extract words from a vector that are found in the same position in another vector.",
      "topics": [
        "str_extract_positional_match"
      ]
    },
    {
      "page": "str_extract_positional_nomatch",
      "title": "Extract words from a vector that are not found in the same position in another vector.",
      "topics": [
        "str_extract_positional_nomatch"
      ]
    },
    {
      "page": "str_rm_blank_space",
      "title": "Remove and replace excess white space from strings.",
      "topics": [
        "str_rm_blank_space"
      ]
    },
    {
      "page": "str_rm_long_words",
      "title": "Remove words from a vector that have more than a maximum number of characters.",
      "topics": [
        "str_rm_long_words"
      ]
    },
    {
      "page": "str_rm_non_alphanumeric",
      "title": "Remove and replace non-alphanumeric characters from strings.",
      "topics": [
        "str_rm_non_alphanumeric"
      ]
    },
    {
      "page": "str_rm_non_printable",
      "title": "Remove and replace non-printable characters from strings.",
      "topics": [
        "str_rm_non_printable"
      ]
    },
    {
      "page": "str_rm_numbers",
      "title": "Remove and replace numbers from strings.",
      "topics": [
        "str_rm_numbers"
      ]
    },
    {
      "page": "str_rm_punctuation",
      "title": "Remove and replace punctuation from strings.",
      "topics": [
        "str_rm_punctuation"
      ]
    },
    {
      "page": "str_rm_regexp_match",
      "title": "Remove words from a vector that match a regular expression.",
      "topics": [
        "str_rm_regexp_match"
      ]
    },
    {
      "page": "str_rm_short_words",
      "title": "Remove words from a vector that don't have a minimum number of characters.",
      "topics": [
        "str_rm_short_words"
      ]
    },
    {
      "page": "str_rm_words",
      "title": "Remove words from a vector of words found in another vector of words.",
      "topics": [
        "str_rm_words"
      ]
    },
    {
      "page": "str_rm_words_by_length",
      "title": "Remove words from a vector based on the number of characters in each word.",
      "topics": [
        "str_rm_words_by_length"
      ]
    },
    {
      "page": "str_stopwords_by_part_of_speech",
      "title": "Create a vector of English words associated with particular parts of speech.",
      "topics": [
        "str_stopwords_by_part_of_speech"
      ]
    },
    {
      "page": "str_tolower",
      "title": "Calls base::tolower(), which converts letters to lowercase. Only included to point out that base::tolower exists and should be used directly.",
      "topics": [
        "str_tolower"
      ]
    },
    {
      "page": "str_weighted_count_match",
      "title": "Weighted count of the words in a vector that are found in another vector.",
      "topics": [
        "str_weighted_count_match"
      ]
    }
  ],
  "_rundeps": [
    "data.table"
  ],
  "_score": 1,
  "_indexed": true,
  "_nocasepkg": "texttools",
  "_universes": [
    "tconwell"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-16T06:22:56.000Z",
      "distro": "noble",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "e1de726268ec3fc3ad8f13d3e9ad0b0393b04a060134335983f684ab8a4394db",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-16T06:22:46.000Z",
      "distro": "noble",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "c6123d43b815c95a86c014ec91c1efdf97ce25dbb1023359802a73ba243f4e31",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-05-16T06:34:37.000Z",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "f347a5d896299b16e02822cc6d06a95c81585377c5c2f61ccb82386b6698939e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-05-16T06:32:48.000Z",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "18d3e7c9898dc648ce57b7915b3f6c4a7e4211f99ed65c6bf61f07af95515d1e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-05-16T06:22:12.000Z",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "c048dad491a5a3dbeca78e0a1bb6f1421fc3ccaee929c0f5f526e6289cdb9b6d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-05-16T06:22:28.000Z",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "ec584b626b4f68217e1e23dc3b4473c640c18974828b8f6d4aa7f5f6f7db94d2",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-05-16T06:22:31.000Z",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "a42a4b89821577b997e3830fd3711e1935cc64d929e7048faf66b4ce7e5a45d8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.1.0",
      "date": "2026-06-02T18:44:52.000Z",
      "commit": "83bcb2e07bf66ccc2e65075dad052de8287cac39",
      "fileid": "293753b963170ea0042cf11bfc6db4991a13ad683e0bcf17c6d3c964488c8f05",
      "status": "success",
      "buildurl": "https://github.com/r-universe/tconwell/actions/runs/25954810424"
    }
  ]
}