new: Script to transform Azure DevOps export csv to an importable csv for workitems.
This commit is contained in:
commit
02bcb22c5c
3 changed files with 180 additions and 0 deletions
38
README.md
Normal file
38
README.md
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
# Python Azure DevOps Work Item CSV
|
||||||
|
|
||||||
|
## Run Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1 Create and activate an isolated Python environment
|
||||||
|
python3 -m venv .venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# 2 Install runtime dependencies
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.pip
|
||||||
|
|
||||||
|
# 3 Convert the CSV
|
||||||
|
python reorder_requirements.py "input.csv" "output.csv"
|
||||||
|
|
||||||
|
python reorder_requirements.py -v "/Users/jj/Downloads/input.csv" "/Users/jj/Downloads/output.csv"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Execution Logic Summary
|
||||||
|
|
||||||
|
1. **build\_level\_map** walks up the `Parent` chain to assign every row its depth (1, 2, 3, …).
|
||||||
|
1. **depth\_first\_order** produces a depth-first sequence of IDs so that each parent is immediately followed by its children (and grandchildren).
|
||||||
|
1. **restructure** creates the empty `ID` column plus the mutually-exclusive `Title 1 – 3` columns and appends every *other* original column (except the old `ID`, `Parent`, and `Title`).
|
||||||
|
1. The script writes the new CSV; nothing is printed except a final “Wrote … rows” confirmation.
|
||||||
|
|
||||||
|
You’ll get a file whose first four columns are:
|
||||||
|
|
||||||
|
| ID | Title 1 | Title 2 | Title 3 | …other original fields… |
|
||||||
|
| -- | ------- | ------- | ------- | ----------------------- |
|
||||||
|
|
||||||
|
where:
|
||||||
|
|
||||||
|
* Top-level items fill **Title 1**.
|
||||||
|
* Second-level items fill **Title 2**.
|
||||||
|
* Third-level (and deeper) items fill **Title 3**.
|
||||||
|
|
||||||
|
All hierarchy constraints you specified are enforced automatically. Feel free to adapt the column names or add CLI switches if you ever need variations (e.g., different max depth).
|
||||||
141
reorder_requirements.py
Normal file
141
reorder_requirements.py
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Re-orders a hierarchical CSV and rewrites it for Azure DevOps Boards import.
|
||||||
|
|
||||||
|
Adds:
|
||||||
|
• Rich INFO-level logging of every major step
|
||||||
|
• A sanity-check ensuring the output is
|
||||||
|
1) complete - same number of rows, no extras or losses
|
||||||
|
2) correct - every non-hierarchy cell identical to the input
|
||||||
|
3) functional - has required columns for ADO import, in the right order
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
from collections import defaultdict
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def _hash_row(row: pd.Series) -> str:
|
||||||
|
"""Stable hash of a Series - used to compare multi-column equality regardless of order."""
|
||||||
|
txt = "||".join(str(v) for v in row.tolist())
|
||||||
|
return hashlib.md5(txt.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def build_level_map(df: pd.DataFrame) -> dict[int, int]:
|
||||||
|
log = logging.getLogger("hierarchy")
|
||||||
|
level_cache: dict[int, int] = {}
|
||||||
|
|
||||||
|
def level_of(_id: int) -> int:
|
||||||
|
if _id in level_cache:
|
||||||
|
return level_cache[_id]
|
||||||
|
parent = df.loc[df["ID"] == _id, "Parent"].iloc[0]
|
||||||
|
if pd.isna(parent):
|
||||||
|
level_cache[_id] = 1
|
||||||
|
else:
|
||||||
|
level_cache[_id] = 1 + level_of(int(parent))
|
||||||
|
return level_cache[_id]
|
||||||
|
|
||||||
|
for _id in df["ID"]:
|
||||||
|
level_of(int(_id))
|
||||||
|
log.info("Calculated depth for %d items", len(level_cache))
|
||||||
|
return level_cache
|
||||||
|
|
||||||
|
|
||||||
|
def depth_first_order(df: pd.DataFrame) -> list[int]:
|
||||||
|
children: defaultdict[int, list[int]] = defaultdict(list)
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
if not pd.isna(row["Parent"]):
|
||||||
|
children[int(row["Parent"])].append(int(row["ID"]))
|
||||||
|
|
||||||
|
ordered: list[int] = []
|
||||||
|
|
||||||
|
def visit(node_id: int) -> None:
|
||||||
|
ordered.append(node_id)
|
||||||
|
for child_id in children.get(node_id, []):
|
||||||
|
visit(child_id)
|
||||||
|
|
||||||
|
for root_id in df[pd.isna(df["Parent"])]["ID"]:
|
||||||
|
visit(int(root_id))
|
||||||
|
logging.getLogger("hierarchy").info("Produced depth-first order of %d IDs", len(ordered))
|
||||||
|
return ordered
|
||||||
|
|
||||||
|
|
||||||
|
def restructure(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
level_map = build_level_map(df)
|
||||||
|
ordered_ids = depth_first_order(df)
|
||||||
|
|
||||||
|
df_sorted = df.set_index("ID").loc[ordered_ids].reset_index()
|
||||||
|
df_sorted["_level"] = df_sorted["ID"].map(level_map)
|
||||||
|
|
||||||
|
# New ADO-style columns
|
||||||
|
df_sorted.insert(0, "ID_new", "")
|
||||||
|
df_sorted["Title 1"] = df_sorted.apply(lambda r: r["Title"] if r["_level"] == 1 else "", axis=1)
|
||||||
|
df_sorted["Title 2"] = df_sorted.apply(lambda r: r["Title"] if r["_level"] == 2 else "", axis=1)
|
||||||
|
df_sorted["Title 3"] = df_sorted.apply(lambda r: r["Title"] if r["_level"] >= 3 else "", axis=1)
|
||||||
|
|
||||||
|
cols_to_drop = {"ID", "Parent", "Title", "_level"}
|
||||||
|
other_cols = [c for c in df_sorted.columns if c not in cols_to_drop | {"ID_new", "Title 1", "Title 2", "Title 3"}]
|
||||||
|
|
||||||
|
final_df = df_sorted[["ID_new", "Title 1", "Title 2", "Title 3", *other_cols]]
|
||||||
|
final_df = final_df.rename(columns={"ID_new": "ID"})
|
||||||
|
logging.getLogger("transform").info("Restructured to %d columns (%s)", len(final_df.columns), ", ".join(final_df.columns))
|
||||||
|
return final_df, other_cols
|
||||||
|
|
||||||
|
|
||||||
|
def sanity_check(df_in: pd.DataFrame, df_out: pd.DataFrame, other_cols: list[str]) -> None:
|
||||||
|
log = logging.getLogger("check")
|
||||||
|
|
||||||
|
# 1) complete ──────────────────────────────────────────────────────────────
|
||||||
|
if len(df_in) != len(df_out):
|
||||||
|
raise ValueError(f"Row count mismatch – input:{len(df_in)} vs output:{len(df_out)}")
|
||||||
|
log.info("Completeness ✔ %d rows", len(df_in))
|
||||||
|
|
||||||
|
# If there are no extra data-bearing columns, skip the correctness test
|
||||||
|
if not other_cols:
|
||||||
|
log.info("Correctness ✔ (no non-hierarchy columns present)")
|
||||||
|
else:
|
||||||
|
# 2) correct ───────────────────────────────────────────────────────────
|
||||||
|
in_hashes = df_in[other_cols].apply(_hash_row, axis=1).value_counts().sort_index()
|
||||||
|
out_hashes = df_out[other_cols].apply(_hash_row, axis=1).value_counts().sort_index()
|
||||||
|
|
||||||
|
if not in_hashes.equals(out_hashes):
|
||||||
|
diff = (in_hashes - out_hashes).replace(0, pd.NA).dropna()
|
||||||
|
raise ValueError(f"Data mismatch detected in {len(diff)} row(s) – hashes differ")
|
||||||
|
log.info("Correctness ✔ all %d non-hierarchy cells identical", len(df_in) * len(other_cols))
|
||||||
|
|
||||||
|
# 3) functional ───────────────────────────────────────────────────────────
|
||||||
|
required_cols = ["ID", "Title 1", "Title 2", "Title 3"]
|
||||||
|
if df_out.columns.tolist()[:4] != required_cols:
|
||||||
|
raise ValueError("Required ADO columns missing or out of order")
|
||||||
|
if not df_out["ID"].eq("").all():
|
||||||
|
raise ValueError("The first column 'ID' must be empty for ADO import")
|
||||||
|
|
||||||
|
log.info("Functional ✔ output format matches Azure DevOps Boards import requirements")
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
argp = argparse.ArgumentParser(description="Re-order hierarchical CSV for ADO Boards")
|
||||||
|
argp.add_argument("input_csv")
|
||||||
|
argp.add_argument("output_csv")
|
||||||
|
argp.add_argument("-v", "--verbose", action="store_true", help="log INFO messages")
|
||||||
|
args = argp.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO if args.verbose else logging.WARNING,
|
||||||
|
format="%(levelname)s %(name)s: %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info("Reading %s …", args.input_csv)
|
||||||
|
df_in = pd.read_csv(args.input_csv)
|
||||||
|
|
||||||
|
df_out, other_cols = restructure(df_in)
|
||||||
|
sanity_check(df_in, df_out, other_cols)
|
||||||
|
|
||||||
|
df_out.to_csv(args.output_csv, index=False)
|
||||||
|
logging.info("Success - wrote %d rows to %s", len(df_out), args.output_csv)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
1
requirements.pip
Normal file
1
requirements.pip
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
pandas>=2.2,<3.0
|
||||||
Loading…
Add table
Add a link
Reference in a new issue