This article starts where we have stopped last time - I have introduced a set of Ada utilities for computing the vpatch dependencies, performing a topological sort on them, and simplifying the flow for making a nicer graph - which made v.sh a complete tool. However, I was quite unsatisfied with the amount of code duplication between the utilities: first, the ~same set of type and variable declarations was used in all the tools; second, the fact that the variables were constantly serialized and deserialized made the code look too messy and unfinished to leave it like that. In the first vpatch (part 2.5), I will unify the code of all the v.sh utilities in one binary that does all computations end-to-end. As the algorithms/functions did not change much, I will not add a detailed description of them in this article, if you are interested, they are available in the previous article.
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
with Ada.Command_Line; use Ada.Command_Line;
with Ada.Containers.Generic_Sort;
procedure Vflow is
type Hunk_State is (Unsatisfied, Satisfied);
Empty_Hash : constant := 0;
type Hunk is record
Vpatch: Natural;
In_File: Natural;
Out_File: Natural;
In_Hash: Natural;
Out_Hash: Natural;
State: Hunk_State;
end record;
Num_Hunks : Integer := Integer'Value(Argument(1));
Num_Vpatches : Integer := Integer'Value(Argument(2));
subtype Hunk_Index is Natural range 1..Num_Hunks;
type Hunk_Array is array (Hunk_Index range <>) of Hunk;
All_Hunks : Hunk_Array(Hunk_Index'Range);
subtype Vpatch_Index is Natural range 0..Num_Vpatches - 1;
type Vpatch_Hunks is array (Vpatch_Index'Range) of Hunk_Index;
VH_Low : Vpatch_Hunks := (others => Hunk_Index'First);
VH_High: Vpatch_Hunks := (others => Hunk_Index'First);
type Vpatch_Set is array (Vpatch_Index'Range) of Boolean;
type Vpatch_List is array (Vpatch_Index'Range) of Vpatch_Index;
type Vpatch_Relationship is array (Vpatch_Index'Range, Vpatch_Index'Range) of Boolean;
Vpatch_Dependencies: Vpatch_Relationship := (others => (others => False));
Vpatch_Conflicts : Vpatch_Relationship := (others => (others => False));
Vpatch_Order : Vpatch_List := (others => 0);
function Get_Hunk return Hunk is
H: Hunk;
begin
Get(H.Vpatch);
Get(H.In_File);
Get(H.Out_File);
Get(H.In_Hash);
Get(H.Out_Hash);
if H.In_Hash = Empty_Hash then
H.State := Satisfied;
else
H.State := Unsatisfied;
end if;
return H;
end Get_Hunk;
procedure Read_Hunks is
I: Natural := All_Hunks'First;
begin
Read_Loop:
for I in All_Hunks'Range loop
exit Read_Loop when End_Of_File;
All_Hunks(I) := Get_Hunk;
end loop Read_Loop;
if I /= All_Hunks'Last then
raise Constraint_Error with "Insufficient number of hunks provided.";
end if;
end Read_Hunks;
procedure Init_Vpatch_Hunk_Bounds is
begin
for I in All_Hunks'Range loop
declare
V : Integer := All_Hunks(I).Vpatch;
begin
if VH_Low(V) > I or All_Hunks(VH_Low(V)).Vpatch /= V then
VH_Low(V) := I;
end if;
VH_High(V) := I;
end;
end loop;
end Init_Vpatch_Hunk_Bounds;
procedure Populate_Conflicts is
function Conflicts(I: Vpatch_Index; J: Vpatch_Index) return Boolean is
begin
for A in VH_Low(I)..VH_High(I) loop
for B in VH_Low(J)..VH_High(J) loop
if (All_Hunks(A).In_File = All_Hunks(B).In_File) and
(All_Hunks(A).In_Hash = All_Hunks(B).In_Hash) then
return True;
end if;
end loop;
end loop;
return False;
end Conflicts;
begin
for I in Vpatch_Index'Range loop
for J in Vpatch_Index'Range loop
if I < J and then Conflicts(I,J) then
Vpatch_Conflicts(I,J) := True;
Vpatch_Conflicts(J,I) := True;
end if;
end loop;
end loop;
end Populate_Conflicts;
procedure Solve is
Input_Hunks : Hunk_Array := All_Hunks;
Output_Hunks: Hunk_Array renames All_Hunks;
Finished : Boolean := False;
-- Start of sorting boilerplate
--
function Input_Before(A: Positive; B: Positive) return Boolean is
begin
return Input_Hunks(A).In_File < Input_Hunks(B).In_File;
end Input_Before;
function Output_Before(A: Positive; B: Positive) return Boolean is
begin
return Output_Hunks(A).Out_File < Output_Hunks(B).Out_File;
end Output_Before;
procedure Input_Swap(A: Positive; B: Positive) is
Tmp : Hunk;
begin
Tmp := Input_Hunks(B);
Input_Hunks(B) := Input_Hunks(A);
Input_Hunks(A) := Tmp;
end Input_Swap;
procedure Output_Swap(A: Positive; B: Positive) is
Tmp : Hunk;
begin
Tmp := Output_Hunks(B);
Output_Hunks(B) := Output_Hunks(A);
Output_Hunks(A) := Tmp;
end Output_Swap;
procedure Input_Sort is
new Ada.Containers.Generic_Sort (Positive, Input_Before, Input_Swap);
procedure Output_Sort is
new Ada.Containers.Generic_Sort (Positive, Output_Before, Output_Swap);
--
-- End of sorting boilerplate
function Check_Genesis(Input: in out Hunk_Array) return Boolean is
Is_Genesis : Boolean := True;
Vpatch: Vpatch_Index := Input(Input'First).Vpatch;
begin
for I in Input'Range loop
Is_Genesis := Is_Genesis and (Input(I).In_Hash = Empty_Hash);
end loop;
return Is_Genesis;
end Check_Genesis;
function Try_Connect(Input: in out Hunk_Array;
Output: in out Hunk_Array) return Boolean is
I : Positive := Input'First;
J : Positive := Output'First;
All_Marked: Boolean := True;
begin
while I <= Input'Last and J <= Output'Last loop
if Input(I).State /= Unsatisfied or
Input(I).In_File < Output(J).Out_File then
I := I + 1;
elsif Input(I).In_File > Output(J).Out_File then
J := J + 1;
else
if Input(I).In_Hash = Output(J).Out_Hash and
Input(I).In_Hash /= Empty_Hash then
Input(I).State := Satisfied;
Vpatch_Dependencies(Input(Input'First).Vpatch,
Output(Output'First).Vpatch) := True;
end if;
I := I + 1;
end if;
end loop;
for I in Input'Range loop
All_Marked := All_Marked and Input(I).State = Satisfied;
end loop;
return All_Marked;
end Try_Connect;
begin
for I in Vpatch_Index'Range loop
Output_Sort(VH_Low(I), VH_High(I));
Input_Sort(VH_Low(I), VH_High(I));
end loop;
for I in Vpatch_Index'Range loop
declare
Hunks_In : Hunk_Array renames Input_Hunks(VH_Low(I)..VH_High(I));
begin
Finished := False;
for J in Vpatch_Index'Range loop
if J /= I then
declare
Hunks_Out : Hunk_Array renames
Output_Hunks(VH_Low(J)..VH_High(J));
begin
Finished := Try_Connect(Hunks_In, Hunks_Out);
end;
else
Finished := Check_Genesis(Hunks_In);
end if;
exit when Finished;
end loop;
-- Looped through all vpatches, there are hunks without matched inputs:
if not Finished then
raise Constraint_Error with "Unsatisfied hunks.";
end if;
end;
end loop;
end Solve;
function Sort return Boolean is
Free_Vpatches : Vpatch_Set := (others => True);
N_Selected : Natural := 0;
Finished : Boolean := False;
Has_Loops : Boolean := False;
begin
while not Finished loop
Finished := True;
for I in Free_Vpatches'Range loop
if Free_Vpatches(I) then
declare
All_Satisfied : Boolean := True;
begin
-- Check if all dependencies are already in the press order
for J in Vpatch_Dependencies'Range(2) loop
if Vpatch_Dependencies(I,J) then
All_Satisfied := All_Satisfied and not Free_Vpatches(J);
end if;
end loop;
-- All dependencies present, can add this vpatch:
if All_Satisfied then
Free_Vpatches(I) := False;
Vpatch_Order(N_Selected) := I;
N_Selected := N_Selected + 1;
Finished := False;
end if;
end;
end if;
end loop;
end loop;
-- Check if there are still free vpatches (there is a loop):
for I in Free_Vpatches'Range loop
if Free_Vpatches(I) then
Put_Line("L " & Integer'Image(I));
Has_Loops := True;
end if;
end loop;
return Has_Loops;
end Sort;
procedure Print_Relationship(C: Character; R: Vpatch_Relationship) is
begin
for I in R'Range(1) loop
for J in R'Range(2) loop
if R(I,J) then
Put_Line(C & " " & Integer'Image(I) & " " & Integer'Image(J));
end if;
end loop;
end loop;
end Print_Relationship;
procedure Simplify_Inner is
Vpatch_TC_Indirect: Vpatch_Relationship := (others => (others => False));
begin
-- Fill Vpatch_TC_Indirect
for I in Vpatch_Order'Range loop
declare
N : Vpatch_Index := Vpatch_Order(I);
begin
for J in Vpatch_Dependencies'Range(2) loop
if Vpatch_Dependencies(N,J) then
for K in Vpatch_Dependencies'Range(2) loop
Vpatch_TC_Indirect(N,K) := Vpatch_TC_Indirect(N,K) or
Vpatch_Dependencies(J,K) or
Vpatch_TC_Indirect(J,K);
end loop;
end if;
end loop;
end;
end loop;
-- Output Vpatch_TC_Indirect
Print_Relationship('I', Vpatch_TC_Indirect);
-- Remove redundant connections from Vpatch_Dependencies
for I in Vpatch_Dependencies'Range(1) loop
for J in Vpatch_TC_Indirect'Range(2) loop
Vpatch_Dependencies(I,J) := Vpatch_Dependencies(I,J) and
not Vpatch_TC_Indirect(I,J);
end loop;
end loop;
-- Output filtered out Vpatch_Dependencies
Print_Relationship('d', Vpatch_Dependencies);
end Simplify_Inner;
procedure Print_Selected is
Selected_Vpatches: Vpatch_Set := (others => False);
procedure Read_Selection is
C: Character;
I: Vpatch_Index;
begin
Read_Loop:
loop
exit Read_Loop when End_Of_File;
Get(C);
if C = 's' then
Get(I);
Selected_Vpatches(I) := True;
elsif C = 'S' then
for J in Selected_Vpatches'Range loop
Selected_Vpatches(J) := True;
end loop;
end if;
end loop Read_Loop;
end Read_Selection;
procedure Add_Dependencies is
Finish : Boolean := False;
begin
while not Finish loop
Finish := True;
for I in Selected_Vpatches'Range loop
if Selected_Vpatches(I) then
for J in Vpatch_Dependencies'Range(2) loop
if Vpatch_Dependencies(I,J) and not Selected_Vpatches(J) then
Finish := False;
Selected_Vpatches(J) := True;
end if;
end loop;
end if;
end loop;
end loop;
end Add_Dependencies;
procedure Print_Flow_Or_Conflicts is
Has_Conflicts : Boolean := False;
begin
for I in Vpatch_Conflicts'Range(1) loop
for J in Vpatch_Conflicts'Range(2) loop
if Selected_Vpatches(I) and Selected_Vpatches(J)
and Vpatch_Conflicts(I,J) then
Put_Line("C " & Integer'Image(I));
Put_Line("C " & Integer'Image(J));
Has_Conflicts := True;
end if;
end loop;
end loop;
if Has_Conflicts then
return;
end if;
for I in Vpatch_Order'Range loop
if Selected_Vpatches(Vpatch_Order(I)) then
Put_Line("P " & Integer'Image(Vpatch_Order(I)));
end if;
end loop;
end Print_Flow_Or_Conflicts;
begin
Read_Selection;
Add_Dependencies;
Print_Flow_Or_Conflicts;
end Print_Selected;
begin
Read_Hunks;
Init_Vpatch_Hunk_Bounds;
Populate_Conflicts;
Solve;
if Sort then
Simplify_Inner;
Print_Selected;
end if;
end Vflow;
I used this opportunity to corrected a few sore spots in the algorithms:
- Handling of file creations in vpatches is simplified.
- "S" command selects all vpatches for toposort calculations. This simplifies awk part of v.sh a bit.
- The dependency graph is always simplified.
- Instead of toposorting only the selected vpatches and their antecedents, now the whole vtree is toposorted, and then user-chosen vpatches with their antecedents are selected from the flow.
The end result of these changes is significant economy of code (~390 instead of 499 LoC - around 10%), and less piping here-and-there on the file system.
The corresponding changes to v.sh:
--- a/vtools/v.sh 24defb473835e2980044a9bb526d2133f3056f62109b3a8136cb168019889f3c94bb721362cffcdb6df2ac52bbc6c111a680d9ba22b759ea676e7ddff7082fa1
+++ b/vtools/v.sh 18e17b243c223f806fa1d9462e096ee2f56d5bdcc2714fff374518bf1ee5fb0fe830720006739b1ed987beeaf4116fd079a7db6a3f900e4b0cba7b0e5847395c
@@ -4,10 +4,8 @@
vgpgdir=""
vpatches=""
-flow=""
filtered=""
selfile=""
-simplified=""
topo=""
usage() {
@@ -34,10 +32,8 @@
loadwot() {
vgpgdir="$(mktemp -d v-gpg-XXXXXX)"
vpatches="$vgpgdir/vpatches.txt"
- flow="$vgpgdir/flow.txt"
filtered="$vgpgdir/filtered.txt"
selfile="$vgpgdir/selected.txt"
- simplified="$vgpgdir/simplified.txt"
topo="$vgpgdir/topo.txt"
checkdir wot
$flow and $simplified temporary files are eliminated, as now the dependency graph is always simplified.
@@ -130,30 +126,34 @@
filterall | internall >"$filtered"
selected=""
if [ "$*" == "-" ]; then
- selected=$(awk '{print $2}' < "$vpatches")
+ echo "S" >"$selfile"
else
- selected="$(echo $* | tr ' ' 'n' | vpatchnos)"
+ echo $* | tr ' ' 'n' | sed -e 's/^/s /g' - | vpatchnos >"$selfile"
fi
- vflow $(wc -l <"$filtered") $(wc -l <"$vpatches") <"$filtered" >"$flow"
- echo "$selected" | sed -e 's/^/s /g' - | sort | uniq >"$selfile"
- cat "$flow" "$selfile" | vtoposort > "$topo"
+ cat "$filtered" "$selfile" |
+ vflow $(wc -l <"$filtered") $(wc -l <"$vpatches") >"$topo"
}
When all leaves are selected for flow calculation, a new "S" command is emitted. Also, instead of vflow | vtoposort pipeline, only vflow is used.
vpatchflowerr() {
- ret=$(head -n1 "$topo" | awk '{print $1}')
- if [ "$ret" = "C" ]; then
- printf "nConflicting leaves selected:n"
- awk '$1 == "C" {print $2}' < "$topo" | vpatchnames | sort | uniq
- cleanup
- elif [ $ret == "L" ]; then
- echo "nLoop among vpatches:"
- awk '{print $2}' < "$topo" | vpatchnames
- cleanup
- elif [ "$ret" != "P" ]; then
- echo "Unknown error. Investigate in $vgpgdir:"
- cat "$topo"
- exit 0 # no cleanup
- fi
+ awk 'BEGIN {err=0; ok=0;}
+err == 0 && $1 == "C" {err=1; ech=$1;}
+err == 0 && $1 == "L" {err=2; ech=$1;}
+err == 0 && $1 == "P" {ok=1;}
+END {if (err != 0) exit err; if (ok != 1) exit 3; exit 0;}' <"$topo"
+ exitcode="$?"
+ if [ "$exitcode" -eq "1" ]; then
+ printf "nConflicting leaves selected:n"
+ awk '$1 == "C" {print $2}' < "$topo" | vpatchnames | sort | uniq
+ cleanup
+ elif [ "$exitcode" -eq "2" ]; then
+ printf "nConflicting leaves selected:n"
+ awk '{print $2}' < "$topo" | vpatchnames
+ cleanup
+ elif [ "$exitcode" -eq "3" ]; then
+ echo "Unknown error. Investigate in $vgpgdir:"
+ cat "$topo"
+ exit 0 # no cleanup
+ fi
}
[ -z "$1" ] && usage
The new error handling routine scans the result of vflow for error or success conditions: if conflicts or loops among vpatches are detected, those are reported. If no press order is calculated for whatever reason, this is also reported.
@@ -180,7 +180,6 @@ shift vpatchflow - - cat "$flow" "$topo" | vsimplify > "$simplified" cat >"$dotfile" <""a[$2]"""} -' "$vpatches" - <"$simplified" >>"$dotfile" +' "$vpatches" - <"$topo" >>"$dotfile" echo "}" >> "$dotfile" dot -Tsvg $dotfile -o $svgfile # dot -Tcmapx $dotfile -o $svgfile.html-map
There is no need to run vsimplify anymore, as output is always simplified.
@@ -202,11 +201,10 @@
vpatchflow -
vno=$(awk -v V="$vpatch" '$1==V {print $2}' < "$vpatches")
- cat "$flow" "$topo" | vsimplify > "$simplified"
- awk -v S="$simplified" -v T="$topo" -v N="$vno" '
-FILENAME==S && ($1 == "I" || $1 == "d" ) && $2 == N {a[$3]=1}
-FILENAME==T && ($1 == "P") {if ($2 in a) {print $2}}
-' "$simplified" "$topo" | vpatchnames
+ awk -v N="$vno" '
+($1 == "I" || $1 == "d" ) && $2 == N {a[$3]=1}
+($1 == "P") {if ($2 in a) {print $2}}
+' "$topo" | vpatchnames
cleanup
fi
@@ -217,11 +215,10 @@
vpatchflow -
vno=$(awk -v V="$vpatch" '$1==V {print $2}' < "$vpatches")
- cat "$flow" "$topo" | vsimplify > "$simplified"
- awk -v S="$simplified" -v T="$topo" -v N="$vno" '
-FILENAME==S && ($1 == "I" || $1 == "d" ) && $3 == N {a[$2]=1}
-FILENAME==T && ($1 == "P") {if ($2 in a) {print $2}}
-' "$simplified" "$topo" | vpatchnames
+ awk -v N="$vno" '
+($1 == "I" || $1 == "d" ) && $3 == N {a[$2]=1}
+($1 == "P") {if ($2 in a) {print $2}}
+' "$topo" | vpatchnames
cleanup
fi
"antecedents" and "descendants" commands now read a single file, so I made awk filters less complex as well.
@@ -258,7 +255,7 @@
vpatchflow $*
vpatchflowerr
- awk '{print $2}' < "$topo" | vpatchnames | while read i; do
+ awk '$1 == "P" {print $2}' < "$topo" | vpatchnames | while read i; do
i=$(readlink -f $i)
cd "$dir"
vpatch <"$i" >/dev/null
@@ -267,4 +264,6 @@
cleanup
fi
"press" now needs to select the press order out of general output.
+echo "Unknown command: $1" + cleanup
One addition is also that any unknown command is reported.
The vpatch is available here:
curl 'http://bvt-trace.net/vpatches/vtools_vsh_utils_one_binary.vpatch' > vtools_vsh_utils_one_binary.vpatch
curl 'http://bvt-trace.net/vpatches/vtools_vsh_utils_one_binary.vpatch.bvt.sig' > vtools_vsh_utils_one_binary.vpatch.bvt.sig
Ada vfilter
This vpatch introduces the Ada version of vfilter program that was mentioned in the first article. It's awk version is presented below:
vfilter() {
awk -v N="$1" '
BEGIN {r=0;}
$0 ~ /^diff -uNr/ {r=1;}
r == 1 && $1 == "---" {sub("[^/]*/", "", $2); ip=$2; ih=$3}
r == 1 && $1 == "+++" {sub("[^/]*/", "", $2); print N,ip,$2,ih,$3; r=0;}'
}
To build the Ada version, I have repurposed code from vpatch.adb. All the common code was moved to vpatch_utils.adb, with vpatch_utils.ads looking like this:
with Bits; use Bits;
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
with Character_IO; use Character_IO;
with Ada.Strings.Fixed;
with Ada.Directories;
with Ada.Characters;
with Ada.Characters.Handling;
with Ada.Characters.Latin_1;
with Ada.Sequential_IO;
with SMG_Keccak; use SMG_Keccak;
with Temporary_File; use Temporary_File;
package Vpatch_Utils is
Hash_Length: constant Positive := 128;
type Hash_Type is (Empty, Value);
type Hash(The_Type: Hash_Type := Empty) is record
case The_Type is
when Value =>
Value: String(1..Hash_Length);
when Empty =>
null;
end case;
end record;
type Header (From_L, To_L: Natural) Is record
From_Hash: Hash;
From_File: String(1..From_L);
To_Hash: Hash;
To_File: String(1..To_L);
end record;
function Get_Header return Header;
function Starts_With(S: String; Prefix: String) return Boolean;
procedure Process_Hunks_For_Header(A_Header: Header);
function Path_Without_Prefix(Pathname: String;
Prefix: Positive) return String;
end Vpatch_Utils;
Then, vpatch.adb is changed to use the new module:
with Ada.Text_IO; use Ada.Text_IO;
with Vpatch_Utils; use Vpatch_Utils;
procedure VPatch is
begin
Read_Loop:
loop
exit Read_Loop when End_Of_File;
declare
S: String := Get_Line;
begin
if Starts_With(S, "diff ") then
declare
H: Header := Get_Header;
begin
Process_Hunks_For_Header(H);
end;
else
Put_Line("Prelude: " & S);
end if;
end;
end loop Read_Loop;
end;
And vfilter.ads looks similar, but has different printing code:
with Ada.Text_IO; use Ada.Text_IO;
with Vpatch_Utils; use Vpatch_Utils;
with Ada.Command_Line; use Ada.Command_Line;
procedure VFilter is
procedure Put(A_Hash: Hash) is
begin
case A_Hash.The_Type is
when Value =>
Put(A_Hash.Value);
when Empty =>
Put("false");
end case;
end;
procedure Put(A_Header: Header) is
begin
Put(Path_Without_Prefix(A_Header.From_File, 1) & " " &
Path_Without_Prefix(A_Header.To_File, 1) & " ");
Put(A_Header.From_Hash);
Put(" ");
Put(A_Header.To_Hash);
New_Line;
end;
Vpatch_No : String := Argument(1);
begin
Read_Loop:
loop
exit Read_Loop when End_Of_File;
declare
S: String := Get_Line;
begin
if Starts_With(S, "diff ") then
declare
H: Header := Get_Header;
begin
Put(Vpatch_No & " ");
Put(H);
end;
end if;
end;
end loop Read_Loop;
end;
Nothing fancy, as you can see, but as I mentioned, there is a performance benefit to this change. The quoted benchmark was done on rather slow system with gawk. On a system with Gales Linux and busybox awk, but faster CPU1, the performance difference is even bigger:
$ time vfilter 0 < patches/linux-genesis.vpatch >/dev/null
0m04.30s real 0m04.08s user 0m00.21s system
$ time awk -v N="0" '
> BEGIN {r=0;}
> $0 ~ /^diff -uNr/ {r=1;}
> r == 1 && $1 == "---" {sub("[^/]*/", "", $2); ip=$2; ih=$3}
> r == 1 && $1 == "+++" {sub("[^/]*/", "", $2); print N,ip,$2,ih,$3; r=0;}
> ' < patches/linux-genesis.vpatch >/dev/null
0m20.89s real 0m20.81s user 0m00.07s system
So whether this speedup is worth the extra vpatch, judge yourself, but I intend to grow future vpatches on this leaf.
The vpatch is available at the following link:
curl 'http://bvt-trace.net/vpatches/vtools_vsh_ada_vfilter.vpatch' > vtools_vsh_ada_vfilter.vpatch
curl 'http://bvt-trace.net/vpatches/vtools_vsh_ada_vfilter.vpatch.bvt.sig' > vtools_vsh_ada_vfilter.vpatch.bvt.sig
With these changes in place, I'm pretty much satisfied with where v.sh is, so next item on my TODO list will be port of keccak RNG to linux-2.6.32. I will also add some sort of autosyncing from the reference code shelf to v.sh, but this will come a bit later in the future.
- Disk does not matter as in both cases the file was cached in RAM. [↩]