From 0d440fdd113e06e63077e94aadbb4865f9fbf90d Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Sun, 20 Jul 2025 12:45:30 -0600 Subject: [PATCH 1/7] Issue::read_tags(): tags is now a directory with a file per tag This is more conflict resistant than the old encoding where tags was a file with a line per tag. This commit only handles the "fs/db => Issue" direction. --- src/issue.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/issue.rs b/src/issue.rs index 06f959f..81559f4 100644 --- a/src/issue.rs +++ b/src/issue.rs @@ -137,13 +137,7 @@ impl Issue { } else if file_name == "dependencies" && direntry.metadata()?.is_dir() { dependencies = Self::read_dependencies(&direntry.path())?; } else if file_name == "tags" { - let contents = std::fs::read_to_string(direntry.path())?; - tags = contents - .lines() - .filter(|s| s.len() > 0) - .map(|tag| String::from(tag.trim())) - .collect(); - tags.sort(); + tags = Self::read_tags(&direntry)?; } else if file_name == "comments" && direntry.metadata()?.is_dir() { Self::read_comments(&mut comments, &direntry.path())?; } else { @@ -525,6 +519,21 @@ impl Issue { Ok(()) } + fn read_tags(tags_direntry: &std::fs::DirEntry) -> Result, IssueError> { + if !tags_direntry.metadata()?.is_dir() { + eprintln!("issue has old-style tags file"); + return Err(IssueError::IssueParseError); + } + let mut tags = Vec::::new(); + for direntry in tags_direntry.path().read_dir()? { + if let Ok(direntry) = direntry { + tags.push(String::from(direntry.file_name().to_string_lossy())); + } + } + tags.sort(); + Ok(tags) + } + fn commit_tags(&self, commit_message: &str) -> Result<(), IssueError> { let mut tags_filename = self.dir.clone(); tags_filename.push("tags"); From a6cc8b6086e0777fa49d9e52b2eaccbd03abaf34 Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Sun, 20 Jul 2025 12:53:44 -0600 Subject: [PATCH 2/7] add a tool to migrate tags from files to dirs --- tools/update-tags-encoding | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100755 tools/update-tags-encoding diff --git a/tools/update-tags-encoding b/tools/update-tags-encoding new file mode 100755 index 0000000..b276b83 --- /dev/null +++ b/tools/update-tags-encoding @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Check out the specified branch (or `entomologist-data` if not specified) +# in a temporary worktree. +# +# For each issue with a `tags` file: +# +# replace the old-style tags file with a new-style tags dir +# +# git commit + +set -e +#set -x + +BRANCH="" + +if [[ -n "$1" ]] && [[ -d "$1" ]]; then + echo "updating ent db in directory '$1'" + pushd "$1" +else + if [[ -n "$1" ]]; then + # better be a branch + BRANCH="$1" + else + BRANCH="entomologist-data" + fi + echo "updating ent db in branch '${BRANCH}'" + WORKTREE_DIR=$(mktemp --directory) + git worktree add "${WORKTREE_DIR}" "${BRANCH}" + pushd "${WORKTREE_DIR}" > /dev/null +fi + +# Now our current working directory is the ent db that we're supposed +# to update. +# +# If $BRANCH is empty, we're in a directory not tracked by git and we +# just change the files. +# +# If $BRANCH is not empty, we're in a git worktree of the branch we're +# supposed to change, so we commit as we go. + +for ISSUE_ID in $(find . -maxdepth 1 -type d -regextype posix-extended -regex '\./[0-9a-f]{32}'); do + ISSUE_ID=$(basename "${ISSUE_ID}") + if ! [[ -f "${ISSUE_ID}/tags" ]]; then + continue + fi + + pushd "${ISSUE_ID}" > /dev/null + + echo "${ISSUE_ID} has tags:" + TAGS=$(cat tags) + echo "${TAGS}" + rm tags + + if [[ -n "${BRANCH}" ]]; then + git rm -f tags + fi + + mkdir tags + for TAG in ${TAGS}; do + touch "tags/${TAG}" + done + + if [[ -n "${BRANCH}" ]]; then + git add tags + git commit -m "issue ${ISSUE_ID}: update tags to new format" + fi + + popd > /dev/null +done + +popd > /dev/null + +if [[ -n "${BRANCH}" ]]; then + git worktree remove "${WORKTREE_DIR}" +fi From 7ed14ede1632147d5bab0b5307bd7fe5a4705208 Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Wed, 23 Jul 2025 15:14:13 -0600 Subject: [PATCH 3/7] update test/0000 tags --- test/0000/3943fc5c173fdf41c0a22251593cd476/tags | 3 --- test/0000/3943fc5c173fdf41c0a22251593cd476/tags/TAG2 | 0 .../0000/3943fc5c173fdf41c0a22251593cd476/tags/i-am-also-a-tag | 0 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/tag1 | 0 4 files changed, 3 deletions(-) delete mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/TAG2 create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/i-am-also-a-tag create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/tag1 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags deleted file mode 100644 index 04e82a6..0000000 --- a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags +++ /dev/null @@ -1,3 +0,0 @@ -tag1 -TAG2 -i-am-also-a-tag diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/TAG2 b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/TAG2 new file mode 100644 index 0000000..e69de29 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/i-am-also-a-tag b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/i-am-also-a-tag new file mode 100644 index 0000000..e69de29 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/tag1 b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/tag1 new file mode 100644 index 0000000..e69de29 From ee269e212e94ee30edafa822906a07342e759244 Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Wed, 23 Jul 2025 20:23:03 -0600 Subject: [PATCH 4/7] refactor Issue::read_tags() to handle escaping --- src/issue.rs | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) diff --git a/src/issue.rs b/src/issue.rs index 81559f4..6a7abe9 100644 --- a/src/issue.rs +++ b/src/issue.rs @@ -48,6 +48,10 @@ pub enum IssueError { ChronoParseError(#[from] chrono::format::ParseError), #[error("Failed to parse issue")] IssueParseError, + #[error("invalid escape character {escape:?} in tag file {filename:?}")] + TagInvalidEscape { escape: String, filename: String }, + #[error("invalid trailing escape character ',' in tag file {filename:?}")] + TagTrailingEscape { filename: String }, #[error("Failed to parse state")] StateParseError, #[error("Failed to run git")] @@ -527,13 +531,52 @@ impl Issue { let mut tags = Vec::::new(); for direntry in tags_direntry.path().read_dir()? { if let Ok(direntry) = direntry { - tags.push(String::from(direntry.file_name().to_string_lossy())); + let tag = Issue::tag_from_filename(&direntry.file_name().to_string_lossy())?; + tags.push(tag); } } tags.sort(); Ok(tags) } + /// Perform un-escape on a filename to make it into a tag: + /// ",0" => "," + /// ",1" => "/" + fn tag_from_filename(filename: &str) -> Result { + let mut tag = String::new(); + let mut token_iter = filename.split(','); + let Some(start) = token_iter.next() else { + return Err(IssueError::StdIoError(std::io::Error::from( + std::io::ErrorKind::NotFound, + ))); + }; + tag.push_str(start); + for token in token_iter { + match token.chars().nth(0) { + Some('0') => { + tag.push(','); + tag.push_str(&token[1..]); + } + Some('1') => { + tag.push('/'); + tag.push_str(&token[1..]); + } + Some(bogus) => { + return Err(IssueError::TagInvalidEscape { + escape: String::from(bogus), + filename: String::from(filename), + }); + } + None => { + return Err(IssueError::TagTrailingEscape { + filename: String::from(filename), + }); + } + } + } + Ok(tag) + } + fn commit_tags(&self, commit_message: &str) -> Result<(), IssueError> { let mut tags_filename = self.dir.clone(); tags_filename.push("tags"); @@ -560,6 +603,64 @@ mod tests { use super::*; use pretty_assertions::assert_eq; + #[test] + fn parse_tag_0() { + assert_eq!( + Issue::tag_from_filename("hello").unwrap(), + String::from("hello") + ); + } + + #[test] + fn parse_tag_1() { + assert_eq!( + Issue::tag_from_filename("hello,0world").unwrap(), + String::from("hello,world") + ); + } + + #[test] + fn parse_tag_2() { + assert_eq!( + Issue::tag_from_filename("hello,1world").unwrap(), + String::from("hello/world") + ); + } + + #[test] + fn parse_tag_3() { + assert_eq!( + Issue::tag_from_filename(",0hello,1world,0").unwrap(), + String::from(",hello/world,") + ); + } + + #[test] + fn parse_tag_4() { + // std::io::Error does not impl PartialEq :-( + let filename = "hello,"; + match Issue::tag_from_filename(filename) { + Ok(tag) => panic!( + "tag_from_filename() accepted invalid input {:?} and returned {:?}", + filename, tag + ), + Err(_e) => (), + } + } + + #[test] + fn parse_tag_5() { + // std::io::Error does not impl PartialEq :-( + let filename = "hello,world"; + match Issue::tag_from_filename(filename) { + Ok(tag) => panic!( + "tag_from_filename() accepted invalid input {:?} and returned {:?}", + filename, tag + ), + Err(_e) => (), + } + } + #[test] fn read_issue_0() { let issue_dir = std::path::Path::new("test/0000/3943fc5c173fdf41c0a22251593cd476/"); From 9db4e6bef86485ccfc1899443e61850456e18269 Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Wed, 23 Jul 2025 20:24:42 -0600 Subject: [PATCH 5/7] add some tags with escapes to the tests --- src/issue.rs | 5 +++++ src/issues.rs | 5 +++++ test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing | 0 .../tags/bird,1wing,1feather | 0 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler | 0 .../tags/deer,0antler,0tassle | 0 .../3943fc5c173fdf41c0a22251593cd476/tags/hop,0scotch,1shoe | 0 7 files changed, 10 insertions(+) create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing,1feather create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler,0tassle create mode 100644 test/0000/3943fc5c173fdf41c0a22251593cd476/tags/hop,0scotch,1shoe diff --git a/src/issue.rs b/src/issue.rs index 6a7abe9..fb05cb6 100644 --- a/src/issue.rs +++ b/src/issue.rs @@ -674,6 +674,11 @@ mod tests { done_time: None, tags: Vec::::from([ String::from("TAG2"), + String::from("bird/wing"), + String::from("bird/wing/feather"), + String::from("deer,antler"), + String::from("deer,antler,tassle"), + String::from("hop,scotch/shoe"), String::from("i-am-also-a-tag"), String::from("tag1"), ]), diff --git a/src/issues.rs b/src/issues.rs index d3c57c0..49d721d 100644 --- a/src/issues.rs +++ b/src/issues.rs @@ -128,6 +128,11 @@ mod tests { done_time: None, tags: Vec::::from([ String::from("TAG2"), + String::from("bird/wing"), + String::from("bird/wing/feather"), + String::from("deer,antler"), + String::from("deer,antler,tassle"), + String::from("hop,scotch/shoe"), String::from("i-am-also-a-tag"), String::from("tag1"), ]), diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing new file mode 100644 index 0000000..e69de29 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing,1feather b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/bird,1wing,1feather new file mode 100644 index 0000000..e69de29 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler new file mode 100644 index 0000000..e69de29 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler,0tassle b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/deer,0antler,0tassle new file mode 100644 index 0000000..e69de29 diff --git a/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/hop,0scotch,1shoe b/test/0000/3943fc5c173fdf41c0a22251593cd476/tags/hop,0scotch,1shoe new file mode 100644 index 0000000..e69de29 From 01ef8679b4b6b289d9233b059085882666255eaf Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Thu, 24 Jul 2025 12:33:57 -0600 Subject: [PATCH 6/7] Issue::commit_tags(): write new-style escaped tags --- src/issue.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/src/issue.rs b/src/issue.rs index fb05cb6..093d2e4 100644 --- a/src/issue.rs +++ b/src/issue.rs @@ -577,12 +577,28 @@ impl Issue { Ok(tag) } + // Perform escape on a tag to make it into a filename: + // "," => ",0" + // "/" => ",1" + fn tag_to_filename(tag: &str) -> String { + let mut filename = tag.replace(",", ",0"); + filename = filename.replace("/", ",1"); + return filename; + } + fn commit_tags(&self, commit_message: &str) -> Result<(), IssueError> { - let mut tags_filename = self.dir.clone(); - tags_filename.push("tags"); - let mut tags_file = std::fs::File::create(&tags_filename)?; + let mut tags_dir_name = self.dir.clone(); + tags_dir_name.push("tags"); + match std::fs::remove_dir_all(&tags_dir_name) { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), + Err(e) => return Err(e.into()), + Ok(_) => (), + } + std::fs::create_dir(&tags_dir_name)?; for tag in &self.tags { - writeln!(tags_file, "{}", tag)?; + let mut tag_filename = tags_dir_name.clone(); + tag_filename.push(Issue::tag_to_filename(tag)); + std::fs::File::create(&tag_filename)?; } self.commit(commit_message)?; Ok(()) @@ -661,6 +677,42 @@ mod tests { } } + #[test] + fn tag_to_filename_0() { + let tag = "hello"; + assert_eq!(Issue::tag_to_filename(tag), "hello"); + } + + #[test] + fn tag_to_filename_1() { + let tag = "hello,"; + assert_eq!(Issue::tag_to_filename(tag), "hello,0"); + } + + #[test] + fn tag_to_filename_2() { + let tag = "/hello"; + assert_eq!(Issue::tag_to_filename(tag), ",1hello"); + } + + #[test] + fn tag_to_filename_3() { + let tag = "hello/bye,boo"; + assert_eq!(Issue::tag_to_filename(tag), "hello,1bye,0boo"); + } + + #[test] + fn tag_to_filename_4() { + let tag = ",,,///,,,"; + assert_eq!(Issue::tag_to_filename(tag), ",0,0,0,1,1,1,0,0,0"); + } + + #[test] + fn tag_to_filename_5() { + let tag = ",0,0,1,1"; + assert_eq!(Issue::tag_to_filename(tag), ",00,00,01,01"); + } + #[test] fn read_issue_0() { let issue_dir = std::path::Path::new("test/0000/3943fc5c173fdf41c0a22251593cd476/"); From 3d008bd390e98559f98fc1a47c8f04fa780f4de2 Mon Sep 17 00:00:00 2001 From: Sebastian Kuzminsky Date: Thu, 24 Jul 2025 12:09:22 -0600 Subject: [PATCH 7/7] update-tags-encoding: escape '/' and ',' in tags --- tools/update-tags-encoding | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/update-tags-encoding b/tools/update-tags-encoding index b276b83..03b866e 100755 --- a/tools/update-tags-encoding +++ b/tools/update-tags-encoding @@ -12,6 +12,13 @@ set -e #set -x +function escape_tag() { + TAG="$1" + TAG=$(echo "${TAG}" | sed -re 's/,/,0/g') + TAG=$(echo "${TAG}" | sed -re 's/\//,1/g') + echo "${TAG}" +} + BRANCH="" if [[ -n "$1" ]] && [[ -d "$1" ]]; then @@ -58,6 +65,7 @@ for ISSUE_ID in $(find . -maxdepth 1 -type d -regextype posix-extended -regex '\ mkdir tags for TAG in ${TAGS}; do + TAG=$(escape_tag "${TAG}") touch "tags/${TAG}" done