Index: projects/clang370-import/bin/Makefile =================================================================== --- projects/clang370-import/bin/Makefile (revision 288925) +++ projects/clang370-import/bin/Makefile (revision 288926) @@ -1,65 +1,54 @@ # From: @(#)Makefile 8.1 (Berkeley) 5/31/93 # $FreeBSD$ .include SUBDIR= cat \ chflags \ chio \ chmod \ cp \ date \ dd \ df \ domainname \ echo \ ed \ expr \ freebsd-version \ getfacl \ hostname \ kenv \ kill \ ln \ ls \ mkdir \ mv \ pax \ pkill \ ps \ pwait \ pwd \ realpath \ rm \ rmdir \ setfacl \ sh \ sleep \ stty \ sync \ test \ uuidgen -.if ${MK_RCMDS} != "no" -SUBDIR+= rcp -.endif - -.if ${MK_SENDMAIL} != "no" -SUBDIR+= rmail -.endif - -.if ${MK_TCSH} != "no" -SUBDIR+= csh -.endif - -.if ${MK_TESTS} != "no" -SUBDIR+= tests -.endif +SUBDIR.${MK_RCMDS}+= rcp +SUBDIR.${MK_SENDMAIL}+= rmail +SUBDIR.${MK_TCSH}+= csh +SUBDIR.${MK_TESTS}+= tests .include SUBDIR:= ${SUBDIR:O} SUBDIR_PARALLEL= .include Index: projects/clang370-import/bin/ls/tests/ls_tests.sh =================================================================== --- projects/clang370-import/bin/ls/tests/ls_tests.sh (revision 288925) +++ projects/clang370-import/bin/ls/tests/ls_tests.sh (revision 288926) @@ -1,969 +1,974 @@ # # Copyright 2015 EMC Corp. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # $FreeBSD$ # create_test_dir() { [ -z "$ATF_TMPDIR" ] || return 0 export ATF_TMPDIR=$(pwd) # XXX: need to nest this because of how kyua creates $TMPDIR; otherwise # it will run into EPERM issues later TEST_INPUTS_DIR="${ATF_TMPDIR}/test/inputs" atf_check -e empty -s exit:0 mkdir -m 0777 -p $TEST_INPUTS_DIR cd $TEST_INPUTS_DIR } create_test_inputs() { create_test_dir atf_check -e empty -s exit:0 mkdir -m 0755 -p a/b/1 atf_check -e empty -s exit:0 ln -s a/b c atf_check -e empty -s exit:0 touch d atf_check -e empty -s exit:0 ln d e atf_check -e empty -s exit:0 touch .f atf_check -e empty -s exit:0 mkdir .g atf_check -e empty -s exit:0 mkfifo h atf_check -e ignore -s exit:0 dd if=/dev/zero of=i count=1000 bs=1 atf_check -e empty -s exit:0 touch klmn atf_check -e empty -s exit:0 touch opqr atf_check -e empty -s exit:0 touch stuv atf_check -e empty -s exit:0 install -m 0755 /dev/null wxyz atf_check -e empty -s exit:0 touch 0b00000001 atf_check -e empty -s exit:0 touch 0b00000010 atf_check -e empty -s exit:0 touch 0b00000011 atf_check -e empty -s exit:0 touch 0b00000100 atf_check -e empty -s exit:0 touch 0b00000101 atf_check -e empty -s exit:0 touch 0b00000110 atf_check -e empty -s exit:0 touch 0b00000111 atf_check -e empty -s exit:0 touch 0b00001000 atf_check -e empty -s exit:0 touch 0b00001001 atf_check -e empty -s exit:0 touch 0b00001010 atf_check -e empty -s exit:0 touch 0b00001011 atf_check -e empty -s exit:0 touch 0b00001100 atf_check -e empty -s exit:0 touch 0b00001101 atf_check -e empty -s exit:0 touch 0b00001110 atf_check -e empty -s exit:0 touch 0b00001111 + + atf_check -e empty -s exit:0 sync } KB=1024 MB=$(( 1024 * $KB )) GB=$(( 1024 * $MB )) TB=$(( 1024 * $GB )) PB=$(( 1024 * $TB )) create_test_inputs2() { create_test_dir for filesize in 1 512 $(( 2 * $KB )) $(( 10 * $KB )) $(( 512 * $KB )); \ do atf_check -e ignore -o empty -s exit:0 \ dd if=/dev/zero of=${filesize}.file bs=1 \ count=1 oseek=${filesize} conv=sparse files="${files} ${filesize}.file" done for filesize in $MB $GB $TB; do atf_check -e ignore -o empty -s exit:0 \ dd if=/dev/zero of=${filesize}.file bs=$MB \ count=1 oseek=$(( $filesize / $MB )) conv=sparse files="${files} ${filesize}.file" done + + atf_check -e empty -s exit:0 sync } atf_test_case A_flag A_flag_head() { atf_set "descr" "Verify -A support with unprivileged users" } A_flag_body() { create_test_dir atf_check -e empty -o empty -s exit:0 ls -A create_test_inputs WITH_A=$PWD/../with_A.out WITHOUT_A=$PWD/../without_A.out atf_check -e empty -o save:$WITH_A -s exit:0 ls -A atf_check -e empty -o save:$WITHOUT_A -s exit:0 ls echo "-A usage" cat $WITH_A echo "No -A usage" cat $WITHOUT_A for dot_path in '\.f' '\.g'; do atf_check -e empty -o not-empty -s exit:0 grep "${dot_path}" \ $WITH_A atf_check -e empty -o empty -s not-exit:0 grep "${dot_path}" \ $WITHOUT_A done } atf_test_case A_flag_implied_when_root A_flag_implied_when_root_head() { atf_set "descr" "Verify that -A is implied for root" atf_set "require.user" "root" } A_flag_implied_when_root_body() { create_test_dir atf_check -e empty -o empty -s exit:0 ls -A create_test_inputs WITH_EXPLICIT=$PWD/../with_explicit_A.out WITH_IMPLIED=$PWD/../with_implied_A.out atf_check -e empty -o save:$WITH_EXPLICIT -s exit:0 ls -A atf_check -e empty -o save:$WITH_IMPLIED -s exit:0 ls echo "Explicit -A usage" cat $WITH_EXPLICIT echo "Implicit -A usage" cat $WITH_IMPLIED atf_check_equal "$(cat $WITH_EXPLICIT)" "$(cat $WITH_IMPLIED)" } atf_test_case B_flag B_flag_head() { atf_set "descr" "Verify that the output from ls -B prints out non-printable characters" } B_flag_body() { atf_skip "kyua report-jenkins doesn't properly escape non-printable chars: https://github.com/jmmv/kyua/issues/136" atf_check -e empty -o empty -s exit:0 touch "$(printf "y\013z")" atf_check -e empty -o match:'y\\013z' -s exit:0 ls -B } atf_test_case C_flag C_flag_head() { atf_set "descr" "Verify that the output from ls -C is multi-column, sorted down" } print_index() { local i=1 local wanted_index=$1; shift while [ $i -le $wanted_index ]; do if [ $i -eq $wanted_index ]; then echo $1 return fi shift : $(( i += 1 )) done } C_flag_body() { create_test_inputs WITH_C=$PWD/../with_C.out export COLUMNS=40 atf_check -e empty -o save:$WITH_C -s exit:0 ls -C echo "With -C usage" cat $WITH_C paths=$(find -s . -mindepth 1 -maxdepth 1 \! -name '.*' -exec basename {} \; ) set -- $paths num_paths=$# num_columns=2 max_num_paths_per_column=$(( $(( $num_paths + 1 )) / $num_columns )) local i=1 while [ $i -le $max_num_paths_per_column ]; do column_1=$(print_index $i $paths) column_2=$(print_index $(( $i + $max_num_paths_per_column )) $paths) #echo "paths[$(( $i + $max_num_paths_per_column ))] = $column_2" expected_expr="$column_1" if [ -n "$column_2" ]; then expected_expr="$expected_expr[[:space:]]+$column_2" fi atf_check -e ignore -o not-empty -s exit:0 \ egrep "$expected_expr" $WITH_C : $(( i += 1 )) done } atf_test_case D_flag D_flag_head() { atf_set "descr" "Verify that the output from ls -D modifies the time format used with ls -l" } D_flag_body() { atf_check -e empty -o empty -s exit:0 touch a.file atf_check -e empty -o match:"$(stat -f '%c[[:space:]]+%N' a.file)" \ -s exit:0 ls -lD '%s' } atf_test_case F_flag F_flag_head() { atf_set "descr" "Verify that the output from ls -F prints out appropriate symbols after files" } F_flag_body() { create_test_inputs atf_check -e empty -s exit:0 \ sh -c "pid=${ATF_TMPDIR}/nc.pid; daemon -p \$pid nc -lU j; sleep 2; pkill -F \$pid" atf_check -e empty -o match:'a/' -s exit:0 ls -F atf_check -e empty -o match:'c@' -s exit:0 ls -F atf_check -e empty -o match:'h\|' -s exit:0 ls -F atf_check -e empty -o match:'j=' -s exit:0 ls -F #atf_check -e empty -o match:'%' -s exit:0 ls -F atf_check -e empty -o match:'stuv' -s exit:0 ls -F atf_check -e empty -o match:'wxyz\*' -s exit:0 ls -F } atf_test_case H_flag H_flag_head() { atf_set "descr" "Verify that ls -H follows symlinks" } H_flag_body() { create_test_inputs atf_check -e empty -o match:'1' -s exit:0 ls -H c } atf_test_case I_flag I_flag_head() { atf_set "descr" "Verify that the output from ls -I is the same as ls for an unprivileged user" } I_flag_body() { create_test_inputs WITH_I=$PWD/../with_I.out WITHOUT_I=$PWD/../without_I.out atf_check -e empty -o save:$WITH_I -s exit:0 ls -I atf_check -e empty -o save:$WITHOUT_I -s exit:0 ls echo "Explicit -I usage" cat $WITH_I echo "No -I usage" cat $WITHOUT_I atf_check_equal "$(cat $WITH_I)" "$(cat $WITHOUT_I)" } atf_test_case I_flag_voids_implied_A_flag_when_root I_flag_voids_implied_A_flag_when_root_head() { atf_set "descr" "Verify that -I voids out implied -A for root" atf_set "require.user" "root" } I_flag_voids_implied_A_flag_when_root_body() { create_test_inputs atf_check -o not-match:'\.f' -s exit:0 ls -I atf_check -o not-match:'\.g' -s exit:0 ls -I atf_check -o match:'\.f' -s exit:0 ls -A -I atf_check -o match:'\.g' -s exit:0 ls -A -I } atf_test_case L_flag L_flag_head() { atf_set "descr" "Verify that -L prints out the symbolic link and conversely -P prints out the target for the symbolic link" } L_flag_body() { atf_check -e empty -o empty -s exit:0 ln -s target1/target2 link1 atf_check -e empty -o match:link1 -s exit:0 ls -L atf_check -e empty -o not-match:target1/target2 -s exit:0 ls -L } atf_test_case R_flag R_flag_head() { atf_set "descr" "Verify that the output from ls -R prints out the directory contents recursively" } R_flag_body() { create_test_inputs WITH_R=$PWD/../with_R.out WITH_R_expected_output=$PWD/../with_R_expected.out atf_check -e empty -o save:$WITH_R -s exit:0 ls -R set -- . $(find -s . \! -name '.*' -type d) while [ $# -gt 0 ]; do dir=$1; shift [ "$dir" != "." ] && echo "$dir:" (cd $dir && ls -1A | sed -e '/^\./d') [ $# -ne 0 ] && echo done > $WITH_R_expected_output echo "-R usage" cat $WITH_R echo "-R expected output" cat $WITH_R_expected_output atf_check_equal "$(cat $WITH_R)" "$(cat $WITH_R_expected_output)" } atf_test_case S_flag S_flag_head() { atf_set "descr" "Verify that -S sorts by file size, then by filename lexicographically" } S_flag_body() { create_test_dir file_list_dir=$PWD/../files atf_check -e empty -o empty -s exit:0 mkdir -p $file_list_dir create_test_inputs create_test_inputs2 WITH_S=$PWD/../with_S.out WITHOUT_S=$PWD/../without_S.out atf_check -e empty -o save:$WITH_S ls -D '%s' -lS atf_check -e empty -o save:$WITHOUT_S ls -D '%s' -l WITH_S_parsed=$(awk '! /^total/ { print $7 }' $WITH_S) set -- $(awk '! /^total/ { print $5, $7 }' $WITHOUT_S) while [ $# -gt 0 ]; do size=$1; shift filename=$1; shift echo $filename >> $file_list_dir/${size} done file_lists=$(find $file_list_dir -type f -exec basename {} \; | sort -nr) WITHOUT_S_parsed=$(for file_list in $file_lists; do sort < $file_list_dir/$file_list; done) echo "-lS usage (parsed)" echo "$WITH_S_parsed" echo "-l usage (parsed)" echo "$WITHOUT_S_parsed" atf_check_equal "$WITHOUT_S_parsed" "$WITH_S_parsed" } atf_test_case T_flag T_flag_head() { atf_set "descr" "Verify -T support" } T_flag_body() { create_test_dir atf_check -e empty -o empty -s exit:0 touch a.file birthtime_in_secs=$(stat -f %B -t %s a.file) birthtime=$(date -j -f %s $birthtime_in_secs +"[[:space:]]+%b[[:space:]]+%e[[:space:]]+%H:%M:%S[[:space:]]+%Y") atf_check -e empty -o match:"$birthtime"'[[:space:]]+a\.file' \ -s exit:0 ls -lT a.file } atf_test_case a_flag a_flag_head() { atf_set "descr" "Verify -a support" } a_flag_body() { create_test_dir # Make sure "." and ".." show up with -a atf_check -e empty -o match:'\.[[:space:]]+\.\.' -s exit:0 ls -ax create_test_inputs WITH_a=$PWD/../with_a.out WITHOUT_a=$PWD/../without_a.out atf_check -e empty -o save:$WITH_a -s exit:0 ls -a atf_check -e empty -o save:$WITHOUT_a -s exit:0 ls echo "-a usage" cat $WITH_a echo "No -a usage" cat $WITHOUT_a for dot_path in '\.f' '\.g'; do atf_check -e empty -o not-empty -s exit:0 grep "${dot_path}" \ $WITH_a atf_check -e empty -o empty -s not-exit:0 grep "${dot_path}" \ $WITHOUT_a done } atf_test_case b_flag b_flag_head() { atf_set "descr" "Verify that the output from ls -b prints out non-printable characters" } b_flag_body() { atf_skip "kyua report-jenkins doesn't properly escape non-printable chars: https://github.com/jmmv/kyua/issues/136" atf_check -e empty -o empty -s exit:0 touch "$(printf "y\013z")" atf_check -e empty -o match:'y\\vz' -s exit:0 ls -b } atf_test_case d_flag d_flag_head() { atf_set "descr" "Verify that -d doesn't descend down directories" } d_flag_body() { create_test_dir output=$PWD/../output atf_check -e empty -o empty -s exit:0 mkdir -p a/b for path in . $PWD a; do atf_check -e empty -o save:$output -s exit:0 ls -d $path atf_check_equal "$(cat $output)" "$path" done } atf_test_case f_flag f_flag_head() { atf_set "descr" "Verify that -f prints out the contents of a directory unsorted" } f_flag_body() { create_test_inputs output=$PWD/../output # XXX: I don't have enough understanding of how the algorithm works yet # to determine more than the fact that all the entries printed out # exist paths=$(find -s . -mindepth 1 -maxdepth 1 \! -name '.*' -exec basename {} \; ) atf_check -e empty -o save:$output -s exit:0 ls -f for path in $paths; do atf_check -e ignore -o not-empty -s exit:0 \ egrep "^$path$" $output done } atf_test_case g_flag g_flag_head() { atf_set "descr" "Verify that -g does nothing (compatibility flag)" } g_flag_body() { create_test_inputs2 for file in $files; do atf_check -e empty -o match:"$(ls -a $file)" -s exit:0 \ ls -ag $file atf_check -e empty -o match:"$(ls -la $file)" -s exit:0 \ ls -alg $file done } atf_test_case h_flag h_flag_head() { atf_set "descr" "Verify that -h prints out the humanized units for file sizes with ls -l" atf_set "require.files" "/usr/bin/bc" } h_flag_body() { # XXX: this test doesn't currently show how 999 bytes will be 999B, # but 1000 bytes will be 1.0K, due to how humanize_number(3) works. create_test_inputs2 for file in $files; do file_size=$(stat -f '%z' "$file") || \ atf_fail "stat'ing $file failed" scale=2 if [ $file_size -lt $KB ]; then divisor=1 scale=0 suffix=B elif [ $file_size -lt $MB ]; then divisor=$KB suffix=K elif [ $file_size -lt $GB ]; then divisor=$MB suffix=M elif [ $file_size -lt $TB ]; then divisor=$GB suffix=G elif [ $file_size -lt $PB ]; then divisor=$TB suffix=T else divisor=$PB suffix=P fi bc_expr="$(printf "scale=%s\n%s/%s\nquit" $scale $file_size $divisor)" size_humanized=$(bc -e "$bc_expr" | tr '.' '\.' | sed -e 's,\.00,,') atf_check -e empty -o match:"$size_humanized.+$file" \ -s exit:0 ls -hl $file done } atf_test_case i_flag i_flag_head() { atf_set "descr" "Verify that -i prints out the inode for files" } i_flag_body() { create_test_inputs paths=$(find -L . -mindepth 1) [ -n "$paths" ] || atf_skip 'Could not find any paths to iterate over (!)' for path in $paths; do atf_check -e empty \ -o match:"$(stat -f '[[:space:]]*%i[[:space:]]+%N' $path)" \ -s exit:0 ls -d1i $path done } atf_test_case k_flag k_flag_head() { atf_set "descr" "Verify that -k prints out the size with a block size of 1kB" } k_flag_body() { create_test_inputs2 for file in $files; do atf_check -e empty \ -o match:"[[:space:]]+$(stat -f "%z" $file)[[:space:]]+.+[[:space:]]+$file" ls -lk $file done } atf_test_case l_flag l_flag_head() { atf_set "descr" "Verify that -l prints out the output in long format" } l_flag_body() { atf_check -e empty -o empty -s exit:0 touch a.file birthtime_in_secs=$(stat -f "%B" -t "%s" a.file) birthtime=$(date -j -f "%s" $birthtime_in_secs +"%b[[:space:]]+%e[[:space:]]+%H:%M") expected_output=$(stat -f "%Sp[[:space:]]+%l[[:space:]]+%Su[[:space:]]+%Sg[[:space:]]+%z[[:space:]]+$birthtime[[:space:]]+a\\.file" a.file) atf_check -e empty -o match:"$expected_output" -s exit:0 ls -l a.file } atf_test_case lcomma_flag lcomma_flag_head() { atf_set "descr" "Verify that -l, prints out the size with ',' delimiters" } lcomma_flag_body() { create_test_inputs atf_check \ -o match:'\-rw\-r\-\-r\-\-[[:space:]]+.+[[:space:]]+1,000[[:space:]]+.+i' \ env LC_ALL=en_US.ISO8859-1 ls -l, i } atf_test_case m_flag m_flag_head() { atf_set "descr" "Verify that the output from ls -m is comma-separated" } m_flag_body() { create_test_dir output=$PWD/../output atf_check -e empty -o empty -s exit:0 touch ,, "a,b " c d e atf_check -e empty -o save:$output -s exit:0 ls -m atf_check_equal "$(cat $output)" ",,, a,b , c, d, e" } atf_test_case n_flag n_flag_head() { atf_set "descr" "Verify that the output from ls -n prints out numeric GIDs/UIDs instead of symbolic GIDs/UIDs" atf_set "require.user" "root" } n_flag_body() { daemon_gid=$(id -g daemon) || atf_skip "could not resolve gid for daemon (!)" nobody_uid=$(id -u nobody) || atf_skip "could not resolve uid for nobody (!)" atf_check -e empty -o empty -s exit:0 touch a.file atf_check -e empty -o empty -s exit:0 chown $nobody_uid:$daemon_gid a.file atf_check -e empty \ -o match:'\-rw\-r\-\-r\-\-[[:space:]]+1[[:space:]]+'"$nobody_uid[[:space:]]+$daemon_gid"'[[:space:]]+.+a\.file' \ ls -ln a.file } atf_test_case o_flag o_flag_head() { atf_set "descr" "Verify that the output from ls -o prints out the chflag values or '-' if none are set" atf_set "require.user" "root" } o_flag_body() { local size=12345 create_test_dir atf_check -e ignore -o empty -s exit:0 dd if=/dev/zero of=a.file \ bs=$size count=1 atf_check -e ignore -o empty -s exit:0 dd if=/dev/zero of=b.file \ bs=$size count=1 atf_check -e empty -o empty -s exit:0 chflags uarch a.file atf_check -e empty -o match:"[[:space:]]+uarch[[:space:]]$size+.+a\\.file" \ -s exit:0 ls -lo a.file atf_check -e empty -o match:"[[:space:]]+\\-[[:space:]]$size+.+b\\.file" \ -s exit:0 ls -lo b.file } atf_test_case p_flag p_flag_head() { atf_set "descr" "Verify that the output from ls -p prints out '/' after directories" } p_flag_body() { create_test_inputs paths=$(find -L .) [ -n "$paths" ] || atf_skip 'Could not find any paths to iterate over (!)' for path in $paths; do suffix= # If path is not a symlink and is a directory, then the suffix # must be "/". if [ ! -L "${path}" -a -d "$path" ]; then suffix=/ fi atf_check -e empty -o match:"$path${suffix}" -s exit:0 \ ls -dp $path done } atf_test_case q_flag_and_w_flag q_flag_and_w_flag_head() { atf_set "descr" "Verify that the output from ls -q prints out '?' for ESC and ls -w prints out the escape character" } q_flag_and_w_flag_body() { atf_skip "kyua report-jenkins doesn't properly escape non-printable chars: https://github.com/jmmv/kyua/issues/136" create_test_dir test_file="$(printf "y\01z")" atf_check -e empty -o empty -s exit:0 touch "$test_file" atf_check -e empty -o match:'y\?z' -s exit:0 ls -q "$test_file" atf_check -e empty -o match:"$test_file" -s exit:0 ls -w "$test_file" } atf_test_case r_flag r_flag_head() { atf_set "descr" "Verify that the output from ls -r sorts the same way as reverse sorting with sort(1)" } r_flag_body() { create_test_inputs WITH_r=$PWD/../with_r.out WITH_sort=$PWD/../with_sort.out atf_check -e empty -o save:$WITH_r -s exit:0 ls -1r atf_check -e empty -o save:$WITH_sort -s exit:0 sh -c 'ls -1 | sort -r' echo "Sorted with -r" cat $WITH_r echo "Reverse sorted with sort(1)" cat $WITH_sort atf_check_equal "$(cat $WITH_r)" "$(cat $WITH_sort)" } atf_test_case s_flag s_flag_head() { atf_set "descr" "Verify that the output from ls -s matches the output from stat(1)" } s_flag_body() { create_test_inputs2 for file in $files; do atf_check -e empty \ -o match:"$(stat -f "%b" $file)[[:space:]]+$file" ls -s $file done } atf_test_case t_flag t_flag_head() { atf_set "descr" "Verify that the output from ls -t sorts by modification time" } t_flag_body() { create_test_dir atf_check -e empty -o empty -s exit:0 touch a.file atf_check -e empty -o empty -s exit:0 touch b.file - sync + atf_check -e empty -s exit:0 sync + atf_check -e empty -o match:'a\.file' -s exit:0 sh -c 'ls -lt | tail -n 1' atf_check -e empty -o match:'b\.file.*a\.file' -s exit:0 ls -Ct atf_check -e empty -o empty -s exit:0 rm a.file atf_check -e empty -o empty -s exit:0 sh -c 'echo "i am a" > a.file' - sync + atf_check -e empty -s exit:0 sync + atf_check -e empty -o match:'b\.file' -s exit:0 sh -c 'ls -lt | tail -n 1' atf_check -e empty -o match:'a\.file.*b\.file' -s exit:0 ls -Ct } atf_test_case u_flag u_flag_head() { atf_set "descr" "Verify that the output from ls -u sorts by last access" } u_flag_body() { create_test_dir atf_check -e empty -o empty -s exit:0 touch a.file - sync atf_check -e empty -o empty -s exit:0 touch b.file - sync + atf_check -e empty -s exit:0 sync atf_check -e empty -o match:'b\.file' -s exit:0 sh -c 'ls -lu | tail -n 1' atf_check -e empty -o match:'a\.file.*b\.file' -s exit:0 ls -Cu atf_check -e empty -o empty -s exit:0 sh -c 'echo "i am a" > a.file' - sync atf_check -e empty -o match:'i am a' -s exit:0 cat a.file - sync + atf_check -e empty -s exit:0 sync atf_check -e empty -o match:'b\.file' -s exit:0 sh -c 'ls -lu | tail -n 1' atf_check -e empty -o match:'a\.file.*b\.file' -s exit:0 ls -Cu } atf_test_case x_flag x_flag_head() { atf_set "descr" "Verify that the output from ls -x is multi-column, sorted across" } x_flag_body() { create_test_inputs WITH_x=$PWD/../with_x.out atf_check -e empty -o save:$WITH_x -s exit:0 ls -x echo "With -x usage" cat $WITH_x atf_check -e ignore -o not-empty -s exit:0 \ egrep "a[[:space:]]+c[[:space:]]+d[[:space:]]+e[[:space:]]+h" $WITH_x atf_check -e ignore -o not-empty -s exit:0 \ egrep "i[[:space:]]+klmn[[:space:]]+opqr[[:space:]]+stuv[[:space:]]+wxyz" $WITH_x } atf_test_case y_flag y_flag_head() { atf_set "descr" "Verify that the output from ls -y sorts the same way as sort(1)" } y_flag_body() { create_test_inputs WITH_sort=$PWD/../with_sort.out WITH_y=$PWD/../with_y.out atf_check -e empty -o save:$WITH_sort -s exit:0 sh -c 'ls -1 | sort' atf_check -e empty -o save:$WITH_y -s exit:0 ls -1y echo "Sorted with sort(1)" cat $WITH_sort echo "Sorted with -y" cat $WITH_y atf_check_equal "$(cat $WITH_sort)" "$(cat $WITH_y)" } atf_test_case 1_flag 1_flag_head() { atf_set "descr" "Verify that -1 prints out one item per line" } 1_flag_body() { create_test_inputs WITH_1=$PWD/../with_1.out WITHOUT_1=$PWD/../without_1.out atf_check -e empty -o save:$WITH_1 -s exit:0 ls -1 atf_check -e empty -o save:$WITHOUT_1 -s exit:0 \ sh -c 'for i in $(ls); do echo $i; done' echo "Explicit -1 usage" cat $WITH_1 echo "No -1 usage" cat $WITHOUT_1 atf_check_equal "$(cat $WITH_1)" "$(cat $WITHOUT_1)" } atf_init_test_cases() { + export BLOCKSIZE=512 atf_add_test_case A_flag atf_add_test_case A_flag_implied_when_root atf_add_test_case B_flag atf_add_test_case C_flag atf_add_test_case D_flag atf_add_test_case F_flag #atf_add_test_case G_flag atf_add_test_case H_flag atf_add_test_case I_flag atf_add_test_case I_flag_voids_implied_A_flag_when_root atf_add_test_case L_flag #atf_add_test_case P_flag atf_add_test_case R_flag atf_add_test_case S_flag atf_add_test_case T_flag #atf_add_test_case U_flag #atf_add_test_case W_flag #atf_add_test_case Z_flag atf_add_test_case a_flag atf_add_test_case b_flag #atf_add_test_case c_flag atf_add_test_case d_flag atf_add_test_case f_flag atf_add_test_case g_flag atf_add_test_case h_flag atf_add_test_case i_flag atf_add_test_case k_flag atf_add_test_case l_flag atf_add_test_case lcomma_flag atf_add_test_case m_flag atf_add_test_case n_flag atf_add_test_case o_flag atf_add_test_case p_flag atf_add_test_case q_flag_and_w_flag atf_add_test_case r_flag atf_add_test_case s_flag atf_add_test_case t_flag atf_add_test_case u_flag atf_add_test_case x_flag atf_add_test_case y_flag atf_add_test_case 1_flag } Index: projects/clang370-import/lib/libxo/Makefile =================================================================== --- projects/clang370-import/lib/libxo/Makefile (revision 288925) +++ projects/clang370-import/lib/libxo/Makefile (revision 288926) @@ -1,105 +1,105 @@ # $FreeBSD$ -LIBXO= ${.CURDIR:H:H}/contrib/libxo +LIBXOSRC= ${SRCTOP}/contrib/libxo -.PATH: ${LIBXO}/libxo +.PATH: ${LIBXOSRC}/libxo LIB= xo SHLIB_MAJOR=0 SHLIBDIR?= /lib SRCS= libxo.c xo_encoder.c xo_syslog.c -CFLAGS+=-I${LIBXO}/libxo +CFLAGS+=-I${LIBXOSRC}/libxo CFLAGS+=-DXO_ENCODERDIR=\"/usr/lib/libxo/encoder\" INCS= xo.h xo_encoder.h INCSDIR=${INCLUDEDIR}/libxo WARNS?= 5 MAN+= libxo.3 MAN+= xo_attr.3 \ xo_create.3 \ xo_emit.3 \ xo_emit_err.3 \ xo_err.3 \ xo_error.3 \ xo_finish.3 \ xo_flush.3 \ xo_message.3 \ xo_no_setlocale.3 \ xo_open_container.3 \ xo_open_list.3 \ xo_open_marker.3 \ xo_parse_args.3 \ xo_set_allocator.3 \ xo_set_flags.3 \ xo_set_info.3 \ xo_set_options.3 \ xo_set_style.3 \ xo_set_syslog_enterprise_id.3 \ xo_set_version.3 \ xo_set_writer.3 \ xo_syslog.3 MAN+= xo_format.5 MLINKS= xo_attr.3 xo_attr_h.3 \ xo_attr.3 xo_attr_hv.3 \ xo_create.3 xo_create_to_file.3 \ xo_create.3 xo_destroy.3 \ xo_emit.3 xo_emit_h.3 \ xo_emit.3 xo_emit_hv.3 \ xo_emit_err.3 xo_emit_errc.3 \ xo_emit_err.3 xo_emit_errx.3 \ xo_emit_err.3 xo_emit_warn.3 \ xo_emit_err.3 xo_emit_warnx.3 \ xo_emit_err.3 xo_emit_warn_c.3 \ xo_emit_err.3 xo_emit_warn_hc.3 \ xo_err.3 xo_errc.3 \ xo_err.3 xo_errx.3 \ xo_err.3 xo_warn.3 \ xo_err.3 xo_warnx.3 \ xo_err.3 xo_warn_c.3 \ xo_err.3 xo_warn_hc.3 \ xo_finish.3 xo_finish_h.3 \ xo_flush.3 xo_flush_h.3 \ xo_message.3 xo_message_c.3 \ xo_message.3 xo_message_hc.3 \ xo_message.3 xo_message_hcv.3 \ xo_open_container.3 xo_open_container_h.3 \ xo_open_container.3 xo_open_container_hd.3 \ xo_open_container.3 xo_open_container_d.3 \ xo_open_container.3 xo_close_container.3 \ xo_open_container.3 xo_close_container_h.3 \ xo_open_container.3 xo_close_container_hd.3 \ xo_open_container.3 xo_close_container_d.3 \ xo_open_list.3 xo_open_list_h.3 \ xo_open_list.3 xo_open_list_hd.3 \ xo_open_list.3 xo_open_list_d.3 \ xo_open_list.3 xo_open_instance.3 \ xo_open_list.3 xo_open_instance_h.3 \ xo_open_list.3 xo_open_instance_hd.3 \ xo_open_list.3 xo_open_instance_d.3 \ xo_open_list.3 xo_close_instance.3 \ xo_open_list.3 xo_close_instance_h.3 \ xo_open_list.3 xo_close_instance_hd.3 \ xo_open_list.3 xo_close_instance_d.3 \ xo_open_list.3 xo_close_list.3 \ xo_open_list.3 xo_close_list_h.3 \ xo_open_list.3 xo_close_list_hd.3 \ xo_open_list.3 xo_close_list_d.3 \ xo_open_marker.3 xo_open_marker_h.3 \ xo_open_marker.3 xo_close_marker.3 \ xo_open_marker.3 xo_close_marker_h.3 \ xo_parse_args.3 xo_set_program.3 \ xo_set_flags.3 xo_clear_flags.3 \ xo_set_style.3 xo_set_style_name.3 \ xo_set_version.3 xo_set_version_h.3 \ xo_syslog.3 xo_close_log.3 \ xo_syslog.3 xo_open_log.3 \ xo_syslog.3 xo_set_logmask.3 \ xo_syslog.3 xo_vsyslog.3 .include Index: projects/clang370-import/release/arm/BANANAPI.conf =================================================================== --- projects/clang370-import/release/arm/BANANAPI.conf (revision 288925) +++ projects/clang370-import/release/arm/BANANAPI.conf (revision 288926) @@ -1,41 +1,41 @@ #!/bin/sh # # $FreeBSD$ # EMBEDDEDBUILD=1 EMBEDDED_TARGET="arm" EMBEDDED_TARGET_ARCH="armv6" EMBEDDEDPORTS="sysutils/u-boot-bananapi" KERNEL="A20" WORLD_FLAGS="${WORLD_FLAGS} UBLDR_LOADADDR=0x42000000" IMAGE_SIZE="1G" PART_SCHEME="MBR" FAT_SIZE="32m -b 1m" FAT_TYPE="16" MD_ARGS="-x 63 -y 255" NODOC=1 export BOARDNAME="BANANAPI" arm_install_uboot() { UBOOT_DIR="/usr/local/share/u-boot/u-boot-bananapi" - UBOOT_FILES="u-boot.img" + UBOOT_FILES="u-boot-sunxi-with-spl.bin" FATMOUNT="${DESTDIR%${KERNEL}}/fat" UFSMOUNT="${DESTDIR%${KERNEL}}/ufs" - chroot ${CHROOTDIR} dd if=${UBOOT_DIR}/u-boot-sunxi-with-spl.bin \ + chroot ${CHROOTDIR} dd if=${UBOOT_DIR}/${UBOOT_FILES} \ of=/dev/${mddev} bs=1k seek=8 conv=sync chroot ${CHROOTDIR} mkdir -p "${FATMOUNT}" "${UFSMOUNT}" chroot ${CHROOTDIR} mount_msdosfs /dev/${mddev}s1 ${FATMOUNT} chroot ${CHROOTDIR} mount /dev/${mddev}s2a ${UFSMOUNT} chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \ ${FATMOUNT}/ubldr.bin chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot sync umount_loop ${CHROOTDIR}/${FATMOUNT} umount_loop ${CHROOTDIR}/${UFSMOUNT} chroot ${CHROOTDIR} rmdir ${FATMOUNT} chroot ${CHROOTDIR} rmdir ${UFSMOUNT} return 0 } Index: projects/clang370-import/release/arm/CUBIEBOARD.conf =================================================================== --- projects/clang370-import/release/arm/CUBIEBOARD.conf (revision 288925) +++ projects/clang370-import/release/arm/CUBIEBOARD.conf (revision 288926) @@ -1,40 +1,40 @@ #!/bin/sh # # $FreeBSD$ # EMBEDDEDBUILD=1 EMBEDDED_TARGET="arm" EMBEDDED_TARGET_ARCH="armv6" EMBEDDEDPORTS="sysutils/u-boot-cubieboard" KERNEL="CUBIEBOARD" WORLD_FLAGS="${WORLD_FLAGS} UBLDR_LOADADDR=0x42000000" IMAGE_SIZE="1G" PART_SCHEME="MBR" FAT_SIZE="32m -b 1m" FAT_TYPE="16" MD_ARGS="-x 63 -y 255" NODOC=1 arm_install_uboot() { UBOOT_DIR="/usr/local/share/u-boot/u-boot-cubieboard" - UBOOT_FILES="u-boot.img" + UBOOT_FILES="u-boot-sunxi-with-spl.bin" FATMOUNT="${DESTDIR%${KERNEL}}/fat" UFSMOUNT="${DESTDIR%${KERNEL}}/ufs" - chroot ${CHROOTDIR} dd if=${UBOOT_DIR}/u-boot-sunxi-with-spl.bin \ + chroot ${CHROOTDIR} dd if=${UBOOT_DIR}/${UBOOT_FILES} \ of=/dev/${mddev} bs=1k seek=8 conv=sync chroot ${CHROOTDIR} mkdir -p "${FATMOUNT}" "${UFSMOUNT}" chroot ${CHROOTDIR} mount_msdosfs /dev/${mddev}s1 ${FATMOUNT} chroot ${CHROOTDIR} mount /dev/${mddev}s2a ${UFSMOUNT} chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \ ${FATMOUNT}/ubldr.bin chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot sync umount_loop ${CHROOTDIR}/${FATMOUNT} umount_loop ${CHROOTDIR}/${UFSMOUNT} chroot ${CHROOTDIR} rmdir ${FATMOUNT} chroot ${CHROOTDIR} rmdir ${UFSMOUNT} return 0 } Index: projects/clang370-import/release/arm/CUBIEBOARD2.conf =================================================================== --- projects/clang370-import/release/arm/CUBIEBOARD2.conf (revision 288925) +++ projects/clang370-import/release/arm/CUBIEBOARD2.conf (revision 288926) @@ -1,41 +1,41 @@ #!/bin/sh # # $FreeBSD$ # EMBEDDEDBUILD=1 EMBEDDED_TARGET="arm" EMBEDDED_TARGET_ARCH="armv6" EMBEDDEDPORTS="sysutils/u-boot-cubieboard2" KERNEL="A20" WORLD_FLAGS="${WORLD_FLAGS} UBLDR_LOADADDR=0x42000000" IMAGE_SIZE="1G" PART_SCHEME="MBR" FAT_SIZE="32m -b 1m" FAT_TYPE="16" MD_ARGS="-x 63 -y 255" NODOC=1 export BOARDNAME="CUBIEBOARD2" arm_install_uboot() { UBOOT_DIR="/usr/local/share/u-boot/u-boot-cubieboard2" - UBOOT_FILES="u-boot.img" + UBOOT_FILES="u-boot-sunxi-with-spl.bin" FATMOUNT="${DESTDIR%${KERNEL}}/fat" UFSMOUNT="${DESTDIR%${KERNEL}}/ufs" - chroot ${CHROOTDIR} dd if=${UBOOT_DIR}/u-boot-sunxi-with-spl.bin \ + chroot ${CHROOTDIR} dd if=${UBOOT_DIR}/${UBOOT_FILES} \ of=/dev/${mddev} bs=1k seek=8 conv=sync chroot ${CHROOTDIR} mkdir -p "${FATMOUNT}" "${UFSMOUNT}" chroot ${CHROOTDIR} mount_msdosfs /dev/${mddev}s1 ${FATMOUNT} chroot ${CHROOTDIR} mount /dev/${mddev}s2a ${UFSMOUNT} chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \ ${FATMOUNT}/ubldr.bin chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot sync umount_loop ${CHROOTDIR}/${FATMOUNT} umount_loop ${CHROOTDIR}/${UFSMOUNT} chroot ${CHROOTDIR} rmdir ${FATMOUNT} chroot ${CHROOTDIR} rmdir ${UFSMOUNT} return 0 } Index: projects/clang370-import/release/doc/en_US.ISO8859-1/relnotes/article.xml =================================================================== --- projects/clang370-import/release/doc/en_US.ISO8859-1/relnotes/article.xml (revision 288925) +++ projects/clang370-import/release/doc/en_US.ISO8859-1/relnotes/article.xml (revision 288926) @@ -1,1573 +1,1792 @@ %release; %sponsor; %vendor; ]>
&os; &release.current; Release Notes The &os; Project $FreeBSD$ 2015 The &os; Documentation Project &tm-attrib.freebsd; &tm-attrib.ibm; &tm-attrib.ieee; &tm-attrib.intel; &tm-attrib.sparc; &tm-attrib.general; The release notes for &os; &release.current; contain a summary of the changes made to the &os; base system on the &release.branch; development line. This document lists applicable security advisories that were issued since the last release, as well as significant changes to the &os; kernel and userland. Some brief remarks on upgrading are also presented. Introduction This document contains the release notes for &os; &release.current;. It describes recently added, changed, or deleted features of &os;. It also provides some notes on upgrading from previous versions of &os;. The &release.type; distribution to which these release notes apply represents the latest point along the &release.branch; development branch since &release.branch; was created. Information regarding pre-built, binary &release.type; distributions along this branch can be found at &release.url;. The &release.type; distribution to which these release notes apply represents a point along the &release.branch; development branch between &release.prev; and the future &release.next;. Information regarding pre-built, binary &release.type; distributions along this branch can be found at &release.url;. This distribution of &os; &release.current; is a &release.type; distribution. It can be found at &release.url; or any of its mirrors. More information on obtaining this (or other) &release.type; distributions of &os; can be found in the Obtaining &os; appendix to the &os; Handbook. All users are encouraged to consult the release errata before installing &os;. The errata document is updated with late-breaking information discovered late in the release cycle or after the release. Typically, it contains information on known bugs, security advisories, and corrections to documentation. An up-to-date copy of the errata for &os; &release.current; can be found on the &os; Web site. This document describes the most user-visible new or changed features in &os; since &release.prev;. In general, changes described here are unique to the &release.branch; branch unless specifically marked as &merged; features. Typical release note items document recent security advisories issued after &release.prev;, new drivers or hardware support, new commands or options, major bug fixes, or contributed software upgrades. They may also list changes to major ports/packages or release engineering practices. Clearly the release notes cannot list every single change made to &os; between releases; this document focuses primarily on security advisories, user-visible changes, and major architectural improvements. Upgrading from Previous Releases of &os; Binary upgrades between RELEASE versions (and snapshots of the various security branches) are supported using the &man.freebsd-update.8; utility. The binary upgrade procedure will update unmodified userland utilities, as well as unmodified GENERIC kernels distributed as a part of an official &os; release. The &man.freebsd-update.8; utility requires that the host being upgraded have Internet connectivity. Source-based upgrades (those based on recompiling the &os; base system from source code) from previous versions are supported, according to the instructions in /usr/src/UPDATING. Upgrading &os; should only be attempted after backing up all data and configuration files. Security and Errata This section lists the various Security Advisories and Errata Notices since &release.prev;. Security Advisories &security; Errata Notices &errata; Userland This section covers changes and additions to userland applications, contributed software, and system utilities. Userland Configuration Changes The default &man.newsyslog.conf.5; now includes files in the /etc/newsyslog.conf.d/ and /usr/local/etc/newsyslog.conf.d/ directories by default for &man.newsyslog.8;. The &man.mailwrapper.8; utility has been updated to use &man.mailer.conf.5; from the LOCALBASE environment variable, which defaults to /usr/local if unset. The MK_ARM_EABI &man.src.conf.5; option has been removed. + + The ntp suite + has been updated to version 4.2.8p3. Userland Application Changes The &man.casperd.8; daemon has been added, which provides access to functionality that is not available in the capability mode sandbox. When unable to load a kernel module with &man.kldload.8;, a message informing to view output of &man.dmesg.8; is now printed, opposed to the previous output Exec format error.. Allow &man.pciconf.8; to identify PCI devices that are attached to a driver to be identified by their device name instead of just the selector. Additionally, an optional device argument to the -l flag to restrict the output to only listing details about a single device. A new flag, onifconsole has been added to /etc/ttys. This allows the system to provide a login prompt via serial console if the device is an active kernel console, otherwise it is equivalent to off. Support for displaying VPD for PCI devices via &man.pciconf.8; has been added. &man.ping.8; protects against malicious network packets using the Capsicum framework to drop privileges. The &man.ps.1; utility has been updated to include the -J flag, used to filter output by matching &man.jail.8; IDs and names. Additionally, argument 0 can be used to -J to only list processes running on the host system. The &man.top.1; utility has been updated to filter by &man.jail.8; ID or name, in followup to the &man.ps.1; change in r265229. The &man.pmcstat.8; utility has been updated to include a new flag, -l, which ends event collection after the specified number of seconds. The &man.ps.1; utility has been updated to include a new keyword, tracer, which displays the PID of the tracing process. Support for adding empty partitions has been added to the &man.mkimg.1; utility. The &man.primes.6; utility has been updated to correctly enumerate prime numbers between 4295098369 and 3825123056546413050, which prior to this change, it would be possible for returned values to be incorrectly identified as prime numbers. The &man.mkimg.1; utility has been updated to include three options used to print information about &man.mkimg.1; itself: Option Output --version The current version of the &man.mkimg.1; utility --formats The disk image file formats supported by &man.mkimg.1; --schemes The partition schemes supported by &man.mkimg.1; Userland &man.ctf.5; support in &man.dtrace.1; has been added. With this change, &man.dtrace.1; is able to resolve type info for function and USDT probe arguments, and function return values. The &man.elfdump.1; utility has been updated to support capability mode provided by &man.capsicum.4;. The &man.fstyp.8; utility has been added, which is used to determine the filesystem on a specified device. The libedit library has been updated to support UTF-8, which additionally provides unicode support to &man.sh.1;. The &man.mkimg.1; utility has been updated to support the MBR EFI partition type. The &man.ptrace.2; system call has been updated include support for Altivec registers on &os;/&arch.powerpc;. A new device control utility, &man.devctl.8; has been added, which allows making administrative changes to individual devices, such as attaching and detaching drivers, and enabling and disabling devices. The &man.devctl.8; utility uses the new &man.devctl.3; library. The &man.netstat.1; utility has been updated to link against the &man.libxo.3; shared library. A new flag, -c, has been added to the &man.mkimg.1; utility, which allows specifying the capacity of the target disk image. The &man.uefisign.8; utility has been added. The &man.freebsd-update.8; utility has been updated to prevent fetching updated binary patches when a previous upgrade has not been thoroughly completed. A regression in the &man.libarchive.3; library that would prevent a directory from being included in the archive when --one-file-system is used has been fixed. The &man.ar.1; utility has been updated to set ARCHIVE_EXTRACT_SECURE_SYMLINKS and ARCHIVE_EXTRACT_SECURE_NODOTDOT to disallow directory traversal when extracting an archive, similar to &man.tar.1;. A race condition in &man.wc.1; that would cause final results to be sent to &man.stderr.4; when receiving the SIGINFO signal has been fixed. The &man.chflags.1;, &man.chgrp.1;, &man.chmod.1;, and &man.chown.8; utilities now affect symbolic links when the -R flag is specified, as documented in &man.symlink.7;. The &man.date.1; utility has been updated to print the modification time of the file passed as an argument to the -r flag, improving compatibility with the GNU &man.date.1; utility behavior. The &man.pw.8; utility has been updated with a new flag, -R, that sets the root directory within which the utility will operate. The &man.lockstat.1; utility has been updated with several improvements: Spin locks are now reported as the amount of time spinning, instead of loop iterations. Reader locks are now recognized as adaptive that can spin on &os;. Lock aquisition events for successful reader try-lock events are now reported. Spin and block events are now reported before lock acquisition events. The &man.fstyp.8; utility has been updated to be able to detect &man.zfs.8; and &man.geli.8; filesystems. + The &man.mkimg.1; utility has been + updated to include support for NTFS + filesystems in both MBR and + GPT partitioning schemes. + + The &man.quota.1; utility has been + updated to include support for IPv6. + + The &man.jail.8; utility has been + updated to include a new flag, -l, which + ensures a clean environment in the target jail when used. + Additionally, &man.jail.8; will run a shell within the target + jail when run no commands are specified. + The &man.w.1; utility has been updated to display the full IPv6 remote address of the host from which a user is connected. + + The &man.jail.8; framework has been + updated to allow mounting &man.linprocfs.5; and + &man.linsysfs.5; within a jail. + + The &man.patch.1; utility has been + updated to include a new option to the -V + flag, none, which disables backup file + creation when applying a patch. + + The + &man.ar.1; utility now enables deterministic mode + (-D) by default. This behavior can be + disabled by specifying the -U flag. + + The &man.xargs.1; utility has been + updated to allow specifying 0 as an + argument to the -P (parallel mode) flag, + which allows creating as many concurrent processes as + possible. + + The &man.patch.1; utility has been + updated to remove the automatic checkout feature. + + A + new utility, &man.sesutil.8;, has been added, which is used + to manage &man.ses.4; devices. + + The &man.pciconf.8; utility has been + updated to use the PCI ID database from the misc/pciids package, if present, + falling back to the PCI ID database in the &os; base + system. + + The &man.ifconfig.8; utility has been + updated to always exit with an error code if an important + &man.ioctl.2; fails. Contributed Software &man.lldb.1; has been updated to upstream snapshot version r196259. &man.byacc.1; has been updated to version 20140101. - &man.jemalloc.3; has been updated to - version 3.5.0. - libc++ has been updated to version 3.4. OpenSSH has been updated to 6.5p1. mdocml has been updated to version 1.12.3. LLVM and Clang have been updated to version 3.4. The binutils suite of utilities has been updated to include upstream patches that add new relocations for &arch.powerpc; support. The ELF Tool Chain has been updated to upstream revision r3136. The texinfo utility and info pages were removed from the base system. The print/texinfo port should be installed on systems where info pages are needed. The ELF object manipulation tools addr2line, elfcopy (strip), nm, readelf, size, and strings were switched to the versions from the ELF Tool Chain project. The libedit library has been updated to include UTF-8 support, adding UTF-8 support to the &man.sh.1; shell. The &man.xz.1; utility has been updated to support multi-threaded compression. The elftoolchain utilities have been updated to version 3179. The &man.xz.1; utility has been updated to version 5.2.1. The &man.nvi.1; utility has been updated to version 2.1.3. The &man.wpa.supplicant.8; and &man.hostapd.8; utilities have been updated to version 2.4. - The &man.unbound.8; utility has been - updated to version 1.5.3. - The &man.resolvconf.8; utility has been updated to version 3.7.0. - The &man.nc.1; utility has been updated - to the OpenBSD 5.7 version. - - The &man.acpi.4; subsystem has been - updated to version 20150515. - - The &man.file.1; utility has been - updated to version 5.23. - bmake has been updated to version 20150606. - Timezone data files have been updated to - version 2015e. - sendmail has been updated to 8.15.2. Starting with &os; 11.0 and sendmail 8.15, sendmail uses uncompressed IPv6 addresses by default, i.e., they will not contain ::. For example, instead of ::1, it will be 0:0:0:0:0:0:0:1. This permits a zero subnet to have a more specific match, such as different map entries for IPv6:0:0 versus IPv6:0. This change requires that configuration data (including maps, files, classes, custom ruleset, etc.) must use the same format, so make certain such configuration data is upgrading. As a very simple check search for patterns like 'IPv6:[0-9a-fA-F:]*::' and 'IPv6::'. To return to the old behavior, set the m4 option confUSE_COMPRESSED_IPV6_ADDRESSES or the cf option UseCompressedIPv6Addresses. + The &man.tcpdump.1; utility has been + updated to version 4.7.4. + OpenSSL has been updated to version 1.0.1p. The &man.ssh.1; utility has been updated to re-implement hostname canonicalization before locating the host in known_hosts. + + The &man.libarchive.3; library has been + updated to properly skip a sparse file entry in a &man.tar.1; + file, which would previously produce errors. + + The apr + library used by &man.svnlite.1; has been updated to version + 1.5.2. + + The serf + library used by &man.svnlite.1; has been updated to version + 1.3.8. + + The &man.svnlite.1; utility has been + updated to version 1.8.14. + + The sqlite3 + library used by &man.svnlite.1; and &man.kerberos.8; has been + updated to version 3.8.11.1. + + Timezone data files have been updated to + version 2015f. + + The &man.acpi.4; subsystem has been + updated to version 20150818. + + The &man.unbound.8; utility has been + updated to version 1.5.4. + + &man.jemalloc.3; has been updated to + version 4.0.2. + + The &man.file.1; utility has been + updated to version 5.25. + + The &man.nc.1; utility has been updated + to the OpenBSD 5.8 version. Installation and Configuration Tools The &man.bsdinstall.8; partition editor and &man.sade.8; utility have been updated to include native ZFS support. The &os; installation utility, &man.bsdinstall.8;, has been updated to set the canmount &man.zfs.8; property to off for the /var dataset, preventing the contents of directories within /var from conflicting when using multiple boot environments, such as that provided by sysutils/beadm. The &man.bsdconfig.8; utility has been updated to skip the initial &man.tzsetup.8; UTC versus wall-clock time prompt when run in a virtual machine, determined when the kern.vm_guest &man.sysctl.8; is set to 1. The &man.bsdinstall.8; utility has been updated to use the new &man.dpv.3; library to display progress when extracting the &os; distributions. Support for detecting and implementing aligning partitions on 1Mb boundaries has been added to &man.bsdinstall.8;. Support for detecting and implementing a workaround for various laptops and motherboards that do not boot properly from GPT-partitioned disks has been added to &man.bsdinstall.8;. Additionally, the active flag will be set on the partition when needed. Support for selecting the partitioning scheme when installing on the UFS filesystem has been added to &man.bsdinstall.8;. <filename class="directory">/etc/rc.d</filename> Scripts The &man.rc.8; subsystem has been updated to allow configuring services in ${LOCALBASE}/etc/rc.conf.d/. If LOCALBASE is unset, it defaults to /usr/local. A new &man.rc.8; script, growfs, has been added, which will resize the root filesystem on boot if /firstboot exists. The mrouted &man.rc.8; script has been removed from the base system. An equivalent script is available from the net/mrouted port. A new &man.rc.8; script, iovctl, has been added, which allows automatically starting the &man.iovctl.8; utility at boot. + + The &man.service.8; utility has been + updated to honor entries within /etc/rc.conf.d/. + <filename class="directory">/etc/periodic</filename> Scripts The daily &man.periodic.8; script 110.clean-tmps has been updated to avoid crossing filesystem mount boundaries when cleaning files in /tmp. A new &man.periodic.8; script, 510.status-world-kernel, has been added, which evaluates the running userland and kernel versions from the &man.uname.1; -U and -K arguments, and prints an error if the system userland and kernel are not in sync. Runtime Libraries and API The Blowfish &man.crypt.3; default format has been changed to $2b$. The &man.readline.3; library is now statically linked in software within the base system, and the shared library is no longer installed, allowing the Ports Collection to use a modern version of the library. The &man.strptime.3; library has been updated to add support for POSIX-2001 features %U and %W. The &man.dl.iterate.phdr.3; library has been changed to always return the path name of the ELF object in the dlpi_name structure member. The &man.libxo.3; library has been imported to the base system. A userland library for Chelsio Terminator 5 based iWARP cards has been added, allowing userland RDMA applications to work over compatible NICs. The &man.gpio.3; library has been added, providing a wrapper around the &man.gpio.4; kernel interface. The &man.procctl.2; system call has been updated to include a facility for non-&man.init.8; processes to be declared as the reaper of child processes and their decendants. The futimens() and utimensat() system calls have been added. See &man.utimensat.2; for more information. The &man.elf.3; compile-time dependency has been removed from dtri.o, which allows adding DTrace probes to userland applications and libraries without also linking against &man.elf.3;. The &man.setmode.3; function has been updated to consistently set errno on failure. The &man.qsort.3; functions have been updated to be able to handle 32-bit aligned data on 64-bit platforms, also providing a significant improvement in 32-bit workloads. Several standard include headers have been updated to use of gcc attributes, such as __result_use_check(), __alloc_size(), and __nonnull(). Support for file verification in MAC has been added. The libgomp library is now only built when building GCC from the base system. An up-to-date version is available in the Ports Collection as devel/libiomp5-devel. The stdlib.h and malloc.h headers have been updated to make use of the gcc alloc_align() attribute. The Blowfish &man.crypt.3; library has been updated to support $2y$ hashes. + + The &man.execl.3; and &man.execlp.3; + library functions have been updated to use the + __sentinel gcc + attribute. ABI Compatibility The &linux; compatibility version has been updated to 2.6.18. The compat.linux.osrelease &man.sysctl.8; is evaluated when building the emulators/linux-c6 and related ports. Kernel This section covers changes to kernel configurations, system tuning, and system control parameters that are not otherwise categorized. Kernel Bug Fixes A kernel bug that inhibited proper functionality of the dev.cpu.0.freq &man.sysctl.8; on &intel; processors with Turbo Boost ™ enabled has been fixed. Support for &man.dtrace.1; stack tracing has been fixed for &os;/&arch.powerpc;, using the trapexit() and asttrapexit() functions instead of checking within addressed kernel space. A kernel panic triggered when destroying a &man.vnet.9; &man.jail.8; configured with &man.gif.4; has been fixed. A kernel panic triggered when destroying a &man.vnet.9; &man.jail.8; configured with &man.gre.4; has been fixed. A bug in &man.ipfw.4; that could potentially lead to a kernel panic when using &man.dummynet.4; at layer 2 has been fixed. The kernel RPC has been updated to include several enhancements: The 45 MiB limit on requests queued for &man.nfsd.8; threads has been removed. Avoids unnecessary throttling by not deferring accounting for completed requests. Fixes an integer overflow and signedness bugs. Support for &man.dtrace.1; has been added for the Book-E ™. + + The &man.kqueue.2; system call has been + updated to handle write events to files larger than 2 + gigabytes. Kernel Configuration The IMAGACT_BINMISC kernel configuration option has been enabled by default, which enables application execution through emulators, such as Qemu. The VT kernel configuration file has been removed, and the &man.vt.4; driver is included in the GENERIC kernel. To enable &man.vt.4;, enter set kern.vty=vt at the &man.loader.8; prompt during boot, or add kern.vty=vt to &man.loader.conf.5; and reboot the system. The &man.config.8; utility has been updated to allow using a non-standard src/ tree, specified as an argument to the -s flag. The &os;/&arch.powerpc64; kernel now builds as a position-independent executable, allowing the kernel to be loaded into and run from any physical or virtual address. This change requires an update to &man.loader.8;. The userland and kernel must be updated before rebooting the system. A new module for creating rpi.dtb has been added for the Raspberry Pi. The rpi.dtb module is now installed to /boot/dtb/ by default for the Raspberry Pi system. Kernel support for Vector-Scalar eXtension (VSX) found on POWER7 and POWER8 hardware has been added. The &man.pmap.9; implementation for 64-bit &powerpc; processors has been overhaulded to improve concurrency. A new module for creating the dtb module for AM335x systems has been added. The PAE_TABLES kernel configuration option has been added for &os;/&arch.i386;, which instructs &man.pmap.9; to use PAE format for page tables while maintaining a 32-bit physical address size elsewhere in the kernel. The use of this option can enhance application-level security by enabling the creation of no execute mappings on modern &arch.i386; processors. Unlike the PAE option, PAE_TABLES preserves kernel binary interface (KBI) compatibility with non-PAE kernels, allowing non-PAE kernel modules and drivers to work with a PAE_TABLES-enabled kernel. Additionally, system limits are tuned for 4GB maximum RAM, avoiding kernel virtual address space (KVA) exhaustion. The SIFTR kernel configuration has been added, allowing building &man.siftr.4; statically into the kernel. The &arch.arm; boot loader, ubldr, is now relocatable. In addition, ubldr.bin is now created during build time, which is a stripped binary with an entry point of 0, providing the ability to specify the load address by running go ${loadaddr} in u-boot. The &man.nvd.4; and &man.nvme.4; drivers are now included in the GENERIC kernel configuration by default. A new kernel configuration option, EM_MULTIQUEUE, has been added which enables multi-queue support in the &man.em.4; driver. Multi-queue support in the &man.em.4; driver is not officially supported by &intel;. + + The GENERIC kernel + configuration has been updated to include the + IPSEC option by default. + + Initial NUMA + affinity and policy configuration has been added. See + &man.numactl.1;, and &man.numa.getaffinity.2;, for usage + details. + + The &man.pms.4; driver has been added + to the GENERIC kernel configuration for + supported architectures. + + The + CUBIEBOARD2 kernel configuration has been + renamed to A20. + + Kernel + debugging symbols are now installed to /usr/lib/debug/boot/kernel/. + To retain the previous behavior, add + KERN_DEBUGDIR="" to + &man.src.conf.5;. System Tuning and Controls The &man.hwpmc.4; default and maximum callchain depths have been increased. The default has been increased from 16 to 32, and the maximum increased from 32 to 128. The kern.osrelease and kern.osreldate are now configurable &man.jail.8; parameters. The &man.devfs.5; device filesystem has been changed to update timestamps for read/write operations using seconds precision. A new &man.sysctl.8;, vfs.devfs.dotimes has been added, which when set to a non-zero value, enables default precision timestamps for these operations. A new &man.sysctl.8;, kern.racct.enable, has been added, which when set to a non-zero value allows using &man.rctl.8; with the GENERIC kernel. A new kernel configuration option, RACCT_DISABLED has also been added. The GENERIC kernel configuration now includes RACCT and RCTL by default. To enable RACCT and RCTL on a system using the GENERIC kernel configuration, add kern.racct.enable=1 to &man.loader.conf.5;, and reboot the system. A new &man.sysctl.8;, net.inet.tcp.hostcache.purgenow, has been added, which when set to 1 during runtime will flush all net.inet.tcp.hostcache entries. + + A new &man.sysctl.8;, + hw.model, has been added, which displays + CPU model information. + + The &man.uart.4; driver has been + updated to allow tuning packets per second captured during + runtime. Devices and Drivers This section covers changes and additions to devices and device drivers since &release.prev;. Device Drivers Support for GPS ports has been added to &man.uhso.4;. The &man.full.4; device has been added, and the lindev(4) device has been removed. Prior to this change, lindev(4) provided only the /dev/full character device, returning ENOSPC on write attempts. As this device is not specific to &linux;, a native &os; version has been added. Hardware context support has been added to the drm/i915 driver, adding support for Mesa 9.2 and later. The &man.vt.4; driver has been updated, replacing the bitmapped kern.vt.spclkeys &man.sysctl.8; with individual kern.vt.kbd_* variants. The &man.hpet.4; driver has been updated to create a /dev/hpetN device, providing access to HPET from userspace. The drm code has been updated to match &linux; version 3.8.13. The &man.psm.4; driver has been updated to include improved support for newer Synaptics ® touchpads and the ClickPad ® mouse on newer Lenovo ™ laptops. Support for the Freescale PCI Root Complex device has been added. + + The &man.cyapa.4; driver has been added, + supporting the Cypress APA I2C trackpad. + + The &man.isl.4; driver has been added, + supporting the Intersil I2C ISL29018 digital ambient light + sensor. Storage Drivers The &man.mpr.4; device has been added, providing support for LSI Fusion-MPT 3 12Gb SCSI/SATA controllers. The &man.mrsas.4; driver has been added, providing support for LSI MegaRAID SAS controllers. The &man.mfi.4; driver will attach to the controller, by default. To enable &man.mrsas.4; add hw.mfi.mrsas_enable=1 to /boot/loader.conf, which turns off &man.mfi.4; device probing. At this time, the &man.mfiutil.8; utility and the &os; version of MegaCLI and StorCli do not work with &man.mrsas.4;. The &man.ctl.4; subsystem has been updated, increasing the ports limit from 128 to 256, and LUN limit from 256 to 1024. The asr(4) driver has been removed, and is no longer supported. The &man.hptnr.4; driver has been updated to version 1.1.1. + + The &man.pms.4; driver has been added, + providing support for the PMC Sierra line of + SAS/SATA host bus + adapters. + + The &man.ioat.4; driver has been added, + providing support for the PSE (Platform + Storage Extension). + + The + CTL High Availability implementation has + been rewritten. + + The &man.ctl.4; driver has been updated + to support CD-ROM and removable devices. Network Drivers Support for Broadcom chipsets BCM57764, BCM57767, BCM57782, BCM57786 and BCM57787 has been added to &man.bge.4;. Support for the &intel; Centrino™ Wireless-N 135 chipset has been added. Firmware for &intel; Centrino™ Wireless-N 105 devices has been added to the base system. The deprecated nve(4) driver has been removed. Users of NVIDIA nForce MCP network adapters are advised to use the &man.nfe.4; driver instead, which has been the default driver for this hardware since &os; 7.0. The if_nf10bmac(4) device has been added, providing support for NetFPGA-10G Embedded CPU Ethernet Core. The if_nf10bmac(4) driver operates on the FPGA, and is not suited for the PCI host interface. The &man.ath.hal.4; driver has been updated to support the Atheros AR1111 chipset. Support for the &intel; Centrino™ Wireless-N 105 chipset has been added. Support for the &man.cxgbe.4; Terminator 5 (T5) 10G/40G cards has been added to &man.netmap.4;. The &man.alc.4; driver has been updated to support AR816x and AR817x ethernet controllers. - The &man.pf.4; packet filter default hash - has been changed from Jenkins to + The &man.pf.4; packet filter default + hash has been changed from Jenkins to Murmur3, providing a 3-percent performance increase in packets-per-second. The &man.vxlan.4; driver has been added, which creates a virtual Layer 2 (Ethernet) network overlaid in a Layer 3 (IP/UDP) network. The &man.vxlan.4; driver is analogous to &man.vlan.4;, but is designed to be better suited for large, multiple-tenant datacenter environments. The &man.gre.4; driver has been significantly overhauled, and has been split into two separate modules, &man.gre.4; and &man.me.4;. The &man.ral.4; driver has been updated to support the RT5390 and RT5392 chipsets. The &man.sfxge.4; driver has been updated to support Solarflare Flareon Ultra 7000-series chipsets. The &man.em.4; driver has been updated with improved transmission queue hang detection. The &man.cdce.4; driver has been updated to include support for the RTL8153 chipset. + + The &man.iwm.4; driver has been imported + from OpenBSD, providing support for &intel; 3160/7260/7265 + wireless chipsets. + + The &man.em.4; driver has been updated + to allow disabling CRC stripping. + + The &man.pf.4; implementation has been + updated to remove support for the scrub fragment + crop|drop-ovl filtering rule. Systems with this + rule in &man.pf.conf.5; will implicitly be converted to the + scrub fragment reassemble filtering rule, + without necessary intervention. + + The &man.em.4; driver has been updated + to support the Skylake I219 chipset. Hardware Support This section covers general hardware support for physical machines, hypervisors, and virtualization environments, as well as hardware changes and updates that do not otherwise fit in other sections of this document. Hardware Support The &man.asmc.4; driver has been updated to support the &apple; MacMini 3,1. Support for &os;/ia64 has been dropped as of &os; 11. An issue that could cause a system to hang when entering ACPI S3 state (suspend to RAM) has been corrected in the &man.acpi.4; and &man.pci.4; drivers. The power management unit subsystem has been updated to support power button events on certain &arch.powerpc; hardware, such as aluminum PowerBook ®. The &man.hwpmc.4; driver has been updated to correct performance counter sampling on G4 (MPC74xxx) and G5 class processors. The OpenCrypto framework has been updated to include AES-ICM and AES-GCM modes, both of which have also been added to the &man.aesni.4; driver. The &man.hwpmc.4; driver has been updated to support the Freescale e500 core. The &man.ig4.4; driver has been added, providing support for the fourth generation &intel; I2C SMBus. The &man.uart.4; driver has been updated to support AMT devices on newer systems. + + Initial SMP support has been + added to the &os;/&arch.arm64; port. Virtualization Support Support for the Virtual Interrupt Delivery feature of &intel; VT-x is enabled if supported by the CPU. This feature can be disabled by running sysctl hw.vmm.vmx.use_apic_vid=0. Additionally, to persist this setting across reboots, add hw.vmm.vmx.use_apic_vid=0 to /etc/sysctl.conf. Support for Posted Interrupt Processing is enabled if supported by the CPU. This feature can be disabled by running sysctl hw.vmm.vmx.use_apic_pir=0. Additionally, to persist this setting across reboots, add hw.vmm.vmx.use_apic_pir=0 to /etc/sysctl.conf. Unmapped IO support has been added to &man.virtio_blk.4;. Unmapped IO support has been added to &man.virtio_scsi.4;. The &man.virtio_random.4; driver has been added to harvest entropy from the host system. &os;/&arch.i386; guests can be run under bhyve. Support for running a &os;/&arch.amd64; Xen guest instance as PVH guest has been added. PVH mode, short for Para-Virtualized Hardware, uses para-virtualized drivers for boot and I/O, and uses hardware virtualization extensions for all other tasks, without the need for emulation. The &man.bhyve.8; hypervisor has been updated to support &amd; processors with SVM and AMD-V hardware extensions. The &man.virtio.console.4; driver has been added, which provides an interface to VirtIO console devices through a &man.tty.4; device. The &man.bhyve.8; hypervisor has been updated to support DSM TRIM commands for virtual AHCI disks. Support for the QEMU virt system has been added. The Hyper-V™ drivers have been updated with several enhancements: The &man.hv.vmbus.4; driver now has multi-channel support. The &man.hv.storvsc.4; driver now has scatter/gather support, in addition to performance improvements. The &man.hv.kvp.4; driver has received several bug fixes. Support for &man.xen.4; para-virtualized domU kernels has been removed. The &man.hv.netvsc.4; driver has been updated to support checksum offloading and TSO. + + The &man.xen.4; driver has been updated + to include support for blkif indirect + segment I/O. ARM Support The &man.nand.4; device is enabled for ARM devices by default. Support for the Exynos 5420 Octa system has been added. The SMP option has been enabled for all Exynos 5 systems supported by &os;. Support for the Toradex Apalis i.MX6 development board has been added. An issue that could cause instability when detecting SD cards on the Raspberry Pi SOC has been fixed. The bcm2835_cpufreq driver has been added, which supports CPU frequency and voltage control on the Raspberry Pi SOC. Support to turn off the BeagleBone Black system with the &man.shutdown.8; -p flag or by invoking &man.poweroff.8; has been added. Audio transmission drivers have been added for Digital Audio Multiplexer (AUDMUXM), Smart Direct Memory Access Controller (SDMA), and Syncronous Serial Interface (SSI). Initial support for the ARM AArch64 architecture has been added. Kernel support for Thumb-2 userland has been added. Support for the hardware power button on the BeagleBone Black system has been added. Initial ACPI support has been added for &os;/&arch.arm64;. + + Support for 1-Wire devices has been + added, providing support for 1-Wire hardware through + &man.gpio.4;. See &man.ow.4;, &man.owc.4;, and + &man.ow.temp.4; for more information. + + Support for the HiSilicon HI6220 SoC has been + added. Storage This section covers changes and additions to file systems and other storage subsystems, both local and networked. General Storage The &man.ctl.4; LUN mapping has been rewritten, replacing iSCSI-specific mapping mechanisms with a new mechanism that works for any port. The &man.ctld.8; utility has been updated to allow controlling non-iSCSI &man.ctl.4; ports. The &man.autofs.5; subsystem has been updated to include a new &man.auto.master.5; map, -media, which allows automatically mounting removable media, such as CD drives or USB flash drives. The &man.autofs.5; subsystem has been updated to include a new &man.auto.master.5; map, -noauto, which handles &man.fstab.5; entries set to noauto. + + The GELI class has + been updated to support the BIO_DELETE + &man.g.bio.9; bio_cmd field, providing + TRIM/UNMAP support on + GELI-backed SSD storage + providers. Networked Storage The new filesystem automount facility, &man.autofs.5;, has been added. The new &man.autofs.5; facility is similar to that found in other &unix;-like operating systems, such as OS X™ and Solaris™. The &man.autofs.5; facility uses a &sun;-compatible &man.auto.master.5; configuration file, and is administered with the &man.automount.8; userland utility, and the &man.automountd.8; and &man.autounmountd.8; daemons. Support for the timeo, actimeo, noac, and proto options have been added to &man.mount.nfs.8;. ZFS The arc_meta_limit statistics are now visible through the kstat &man.sysctl.8;. As a result of this change, the vfs.zfs.arc_meta_used &man.sysctl.8; has been removed, and replaced with the kstat.zfs.misc.arcstats.arc_meta_used &man.sysctl.8;. + + The &man.zfs.8; l2arc + code has been updated to take ashift into + account when gathering buffers to be written to the + l2arc device. &man.geom.4; Support for the disklabel64 partitioning scheme has been added to &man.gpart.8;. Support for the apple-boot, apple-hfs, and apple-ufs MBR partitioning schemes have been added to &man.gpart.8;. + + The &man.gpart.8; utility has been + updated to include a new attribute for GPT + partitions, lenovofix, which when set, + which works around BIOS compatibility + issues reported on several Lenovo ™ laptops. Boot Loader Changes This section covers the boot loader, boot menu, and other boot-related changes. Boot Loader Changes The memory test run at boot time on &os;/&arch.amd64; platforms has been disabled by default. A new &man.ttys.5; class, 3wire, has been added. This is similar to the existing terminal classes, but does not have a defined baudrate. The &man.vt.4; driver has been made the default system console driver. The &man.syscons.4; driver is still available, and can be enabled by adding kern.vty=sc in &man.loader.conf.5;. Alternatively, &man.syscons.4; can be enabled at boot time by entering set kern.vty=sc at the &man.loader.8; prompt. Support for bzipfs has been added to the EFI loader. The boot loader has been updated to support entering the GELI passphrase before loading the kernel. To enable this behavior, add geom_eli_passphrase_prompt="YES" to &man.loader.conf.5;. The &man.ttys.5; file for &os;/&arch.arm; has been updated to enable ttyu1, ttyu2, and ttyu3 by default, if the callin port is an active console port. Boot Menu Changes   Networking This section describes changes that affect networking in &os;. Network Protocols Support for the IPX network transport protocol has been removed, and will not be supported in &os; 11 and later releases. Support for PLPMTUD blackhole detection (RFC 4821) has been added to the &man.tcp.4; stack, disabled by default. New control tunables have been added: Tunable Description net.inet.tcp.pmtud_blackhole_detection Enables or disables PLPMTUD blackhole detection net.inet.tcp.pmtud_blackhole_mss MSS to try for IPv4 net.inet.tcp.v6pmtud_blackhole_mss MSS to try for IPv6 New monitoring &man.sysctl.8;s haven been added: Tunable Description net.inet.tcp.pmtud_blackhole_activated Number of times the code was activated to attempt downshifting the MSS net.inet.tcp.pmtud_blackhole_min_activated Number of times the blackhole MSS was used in an attempt to downshift net.inet.tcp.pmtud_blackhole_failed Number of times that the blackhole failed to connect after downshifting the MSS Support for IP identification for atomic datagrams (RFC 6864) has been added. Support for this feature can be toggled with the net.inet.ip.rfc6864 &man.sysctl.8;, which is enabled by default. + + The IPSEC has been + updated to include support for AES modes on + both software-only and hardware-backed (&man.aesni.4;) + systems. + + The + network stack has been updated to fix handling of + IPv6 On-Link redirects. Ports Collection and Package Infrastructure This section covers changes to the &os; Ports Collection, package infrastructure, and package maintenance and installation tools. Infrastructure Changes   Packaging Changes   Documentation This section covers changes to the &os; Documentation Project sources and toolchain. Documentation Source Changes   Documentation Toolchain Changes   Release Engineering and Integration This section convers changes that are specific to the &os; Release Engineering processes. Integration Changes The Release Engineering build tools have been updated to include support for producing virtual machine disk images for various cloud hosting providers. The Release Engineering build tools have been updated to use multi-threaded &man.xz.1;. By default, the number of &man.xz.1; threads is set to the number of cores available. The Release Engineering build tools have been updated to include support for building &os;/&arch.arm64; virtual machine and memory stick installation images. The Release Engineering build tools have been updated to support building &os;/&arch.arm; images without external utilities for supported boards where a corresponding u-boot port exists in the Ports Collection. The &os;/&arch.i386; memory stick installation images are now created using the &man.mkimg.1; utility, matching the way the &os;/&arch.amd64; images are created.
Index: projects/clang370-import/release/doc/share/xml/sponsor.ent =================================================================== --- projects/clang370-import/release/doc/share/xml/sponsor.ent (revision 288925) +++ projects/clang370-import/release/doc/share/xml/sponsor.ent (revision 288926) @@ -1,50 +1,55 @@ + + + + + Index: projects/clang370-import/share/mk/bsd.own.mk =================================================================== --- projects/clang370-import/share/mk/bsd.own.mk (revision 288925) +++ projects/clang370-import/share/mk/bsd.own.mk (revision 288926) @@ -1,252 +1,255 @@ # $FreeBSD$ # # The include file set common variables for owner, # group, mode, and directories. Defaults are in brackets. # # # +++ variables +++ # # DESTDIR Change the tree where the file gets installed. [not set] # # DISTDIR Change the tree where the file for a distribution # gets installed (see /usr/src/release/Makefile). [not set] # # COMPRESS_CMD Program to compress documents. # Output is to stdout. [gzip -cn] # # COMPRESS_EXT File name extension of ${COMPRESS_CMD} command. [.gz] # # BINOWN Binary owner. [root] # # BINGRP Binary group. [wheel] # # BINMODE Binary mode. [555] # # NOBINMODE Mode for non-executable files. [444] # # LIBDIR Base path for libraries. [/usr/lib] # # LIBCOMPATDIR Base path for compat libraries. [/usr/lib/compat] # # LIBDATADIR Base path for misc. utility data files. [/usr/libdata] # # LIBEXECDIR Base path for system daemons and utilities. [/usr/libexec] # # LINTLIBDIR Base path for lint libraries. [/usr/libdata/lint] # # SHLIBDIR Base path for shared libraries. [${LIBDIR}] # # LIBOWN Library owner. [${BINOWN}] # # LIBGRP Library group. [${BINGRP}] # # LIBMODE Library mode. [${NOBINMODE}] # # # DEBUGDIR Base path for standalone debug files. [/usr/lib/debug] # # DEBUGMODE Mode for debug files. [${NOBINMODE}] # # # KMODDIR Base path for loadable kernel modules # (see kld(4)). [/boot/kernel] # # KMODOWN Kernel and KLD owner. [${BINOWN}] # # KMODGRP Kernel and KLD group. [${BINGRP}] # # KMODMODE KLD mode. [${BINMODE}] # # # SHAREDIR Base path for architecture-independent ascii # text files. [/usr/share] # # SHAREOWN ASCII text file owner. [root] # # SHAREGRP ASCII text file group. [wheel] # # SHAREMODE ASCII text file mode. [${NOBINMODE}] # # # CONFDIR Base path for configuration files. [/etc] # # CONFOWN Configuration file owner. [root] # # CONFGRP Configuration file group. [wheel] # # CONFMODE Configuration file mode. [644] # # # DOCDIR Base path for system documentation (e.g. PSD, USD, # handbook, FAQ etc.). [${SHAREDIR}/doc] # # DOCOWN Documentation owner. [${SHAREOWN}] # # DOCGRP Documentation group. [${SHAREGRP}] # # DOCMODE Documentation mode. [${NOBINMODE}] # # # INFODIR Base path for GNU's hypertext system # called Info (see info(1)). [${SHAREDIR}/info] # # INFOOWN Info owner. [${SHAREOWN}] # # INFOGRP Info group. [${SHAREGRP}] # # INFOMODE Info mode. [${NOBINMODE}] # # # MANDIR Base path for manual installation. [${SHAREDIR}/man/man] # # MANOWN Manual owner. [${SHAREOWN}] # # MANGRP Manual group. [${SHAREGRP}] # # MANMODE Manual mode. [${NOBINMODE}] # # # NLSDIR Base path for National Language Support files # installation. [${SHAREDIR}/nls] # # NLSOWN National Language Support files owner. [${SHAREOWN}] # # NLSGRP National Language Support files group. [${SHAREGRP}] # # NLSMODE National Language Support files mode. [${NOBINMODE}] # # INCLUDEDIR Base path for standard C include files [/usr/include] .if !target(____) ____: .include # options now here or src.opts.mk .if !defined(_WITHOUT_SRCCONF) .if ${MK_CTF} != "no" CTFCONVERT_CMD= ${CTFCONVERT} ${CTFFLAGS} ${.TARGET} .elif defined(.PARSEDIR) || (defined(MAKE_VERSION) && ${MAKE_VERSION} >= 5201111300) CTFCONVERT_CMD= .else CTFCONVERT_CMD= @: .endif .if ${MK_INSTALL_AS_USER} != "no" _uid!= id -u .if ${_uid} != 0 .if !defined(USER) USER!= id -un .endif _gid!= id -g .for x in BIN CONF DOC DTB INFO KMOD LIB MAN NLS SHARE $xOWN= ${USER} $xGRP= ${_gid} .endfor .endif .endif .endif # !_WITHOUT_SRCCONF # Binaries BINOWN?= root BINGRP?= wheel BINMODE?= 555 NOBINMODE?= 444 .if defined(MODULES_WITH_WORLD) KMODDIR?= /boot/modules .else KMODDIR?= /boot/kernel .endif KMODOWN?= ${BINOWN} KMODGRP?= ${BINGRP} KMODMODE?= ${BINMODE} DTBDIR?= /boot/dtb DTBOWN?= root DTBGRP?= wheel DTBMODE?= 444 LIBDIR?= /usr/lib LIBCOMPATDIR?= /usr/lib/compat LIBDATADIR?= /usr/libdata LIBEXECDIR?= /usr/libexec LINTLIBDIR?= /usr/libdata/lint SHLIBDIR?= ${LIBDIR} LIBOWN?= ${BINOWN} LIBGRP?= ${BINGRP} LIBMODE?= ${NOBINMODE} DEBUGDIR?= /usr/lib/debug DEBUGMODE?= ${NOBINMODE} # Share files SHAREDIR?= /usr/share SHAREOWN?= root SHAREGRP?= wheel SHAREMODE?= ${NOBINMODE} CONFDIR?= /etc CONFOWN?= root CONFGRP?= wheel CONFMODE?= 644 MANDIR?= ${SHAREDIR}/man/man MANOWN?= ${SHAREOWN} MANGRP?= ${SHAREGRP} MANMODE?= ${NOBINMODE} DOCDIR?= ${SHAREDIR}/doc DOCOWN?= ${SHAREOWN} DOCGRP?= ${SHAREGRP} DOCMODE?= ${NOBINMODE} INFODIR?= ${SHAREDIR}/info INFOOWN?= ${SHAREOWN} INFOGRP?= ${SHAREGRP} INFOMODE?= ${NOBINMODE} NLSDIR?= ${SHAREDIR}/nls NLSOWN?= ${SHAREOWN} NLSGRP?= ${SHAREGRP} NLSMODE?= ${NOBINMODE} INCLUDEDIR?= /usr/include # # install(1) parameters. # HRDLINK?= -l h SYMLINK?= -l s RSYMLINK?= -l rs INSTALL_LINK?= ${INSTALL} ${HRDLINK} INSTALL_SYMLINK?= ${INSTALL} ${SYMLINK} INSTALL_RSYMLINK?= ${INSTALL} ${RSYMLINK} # Common variables .if !defined(DEBUG_FLAGS) STRIP?= -s .endif COMPRESS_CMD?= gzip -cn COMPRESS_EXT?= .gz # Set XZ_THREADS to 1 to disable multi-threading. XZ_THREADS?= 0 .if !empty(XZ_THREADS) XZ_CMD?= xz -T ${XZ_THREADS} .else XZ_CMD?= xz .endif # Pointer to the top directory into which tests are installed. Should not be # overriden by Makefiles, but the user may choose to set this in src.conf(5). TESTSBASE?= /usr/tests -# Compat for the moment +# Compat for the moment -- old bsd.own.mk only included this when _WITHOUT_SRCCONF +# wasn't defined. bsd.ports.mk and friends depend on this behavior. Remove in 12. +.if !defined(_WITHOUT_SRCCONF) .include +.endif # !_WITHOUT_SRCCONF .endif # !target(____) Index: projects/clang370-import/share =================================================================== --- projects/clang370-import/share (revision 288925) +++ projects/clang370-import/share (revision 288926) Property changes on: projects/clang370-import/share ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share:r288831-288925 Index: projects/clang370-import/sys/amd64/amd64/support.S =================================================================== --- projects/clang370-import/sys/amd64/amd64/support.S (revision 288925) +++ projects/clang370-import/sys/amd64/amd64/support.S (revision 288926) @@ -1,796 +1,795 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include -#include #include #include "assym.s" .text /* * bcopy family * void bzero(void *buf, u_int len) */ /* done */ ENTRY(bzero) PUSH_FRAME_POINTER movq %rsi,%rcx xorl %eax,%eax shrq $3,%rcx cld rep stosq movq %rsi,%rcx andq $7,%rcx rep stosb POP_FRAME_POINTER ret END(bzero) /* Address: %rdi */ ENTRY(pagezero) PUSH_FRAME_POINTER movq $-PAGE_SIZE,%rdx subq %rdx,%rdi xorl %eax,%eax 1: movnti %rax,(%rdi,%rdx) movnti %rax,8(%rdi,%rdx) movnti %rax,16(%rdi,%rdx) movnti %rax,24(%rdi,%rdx) addq $32,%rdx jne 1b sfence POP_FRAME_POINTER ret END(pagezero) ENTRY(bcmp) PUSH_FRAME_POINTER movq %rdx,%rcx shrq $3,%rcx cld /* compare forwards */ repe cmpsq jne 1f movq %rdx,%rcx andq $7,%rcx repe cmpsb 1: setne %al movsbl %al,%eax POP_FRAME_POINTER ret END(bcmp) /* * bcopy(src, dst, cnt) * rdi, rsi, rdx * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) PUSH_FRAME_POINTER xchgq %rsi,%rdi movq %rdx,%rcx movq %rdi,%rax subq %rsi,%rax cmpq %rcx,%rax /* overlapping && src < dst? */ jb 1f shrq $3,%rcx /* copy by 64-bit words */ cld /* nope, copy forwards */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ rep movsb POP_FRAME_POINTER ret /* ALIGN_TEXT */ 1: addq %rcx,%rdi /* copy backwards */ addq %rcx,%rsi decq %rdi decq %rsi andq $7,%rcx /* any fractional bytes? */ std rep movsb movq %rdx,%rcx /* copy remainder by 32-bit words */ shrq $3,%rcx subq $7,%rsi subq $7,%rdi rep movsq cld POP_FRAME_POINTER ret END(bcopy) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) PUSH_FRAME_POINTER movq %rdx,%rcx shrq $3,%rcx /* copy by 64-bit words */ cld /* copy forwards */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ rep movsb POP_FRAME_POINTER ret END(memcpy) /* * pagecopy(%rdi=from, %rsi=to) */ ENTRY(pagecopy) PUSH_FRAME_POINTER movq $-PAGE_SIZE,%rax movq %rax,%rdx subq %rax,%rdi subq %rax,%rsi 1: prefetchnta (%rdi,%rax) addq $64,%rax jne 1b 2: movq (%rdi,%rdx),%rax movnti %rax,(%rsi,%rdx) movq 8(%rdi,%rdx),%rax movnti %rax,8(%rsi,%rdx) movq 16(%rdi,%rdx),%rax movnti %rax,16(%rsi,%rdx) movq 24(%rdi,%rdx),%rax movnti %rax,24(%rsi,%rdx) addq $32,%rdx jne 2b sfence POP_FRAME_POINTER ret END(pagecopy) /* fillw(pat, base, cnt) */ /* %rdi,%rsi, %rdx */ ENTRY(fillw) PUSH_FRAME_POINTER movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx cld rep stosw POP_FRAME_POINTER ret END(fillw) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines should be * the only places that do this. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ /* * copyout(from_kernel, to_user, len) - MP SAFE * %rdi, %rsi, %rdx */ ENTRY(copyout) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyout_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movq %rsi,%rax addq %rdx,%rax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copyout_fault xchgq %rdi,%rsi /* bcopy(%rsi, %rdi, %rdx) */ movq %rdx,%rcx shrq $3,%rcx cld rep movsq movb %dl,%cl andb $7,%cl rep movsb done_copyout: xorl %eax,%eax movq PCPU(CURPCB),%rdx movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret ALIGN_TEXT copyout_fault: movq PCPU(CURPCB),%rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax POP_FRAME_POINTER ret END(copyout) /* * copyin(from_user, to_kernel, len) - MP SAFE * %rdi, %rsi, %rdx */ ENTRY(copyin) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyin_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyin /* * make sure address is valid */ movq %rdi,%rax addq %rdx,%rax jc copyin_fault movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copyin_fault xchgq %rdi,%rsi movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ cld rep movsq movb %al,%cl andb $7,%cl /* copy remaining bytes */ rep movsb done_copyin: xorl %eax,%eax movq PCPU(CURPCB),%rdx movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret ALIGN_TEXT copyin_fault: movq PCPU(CURPCB),%rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax POP_FRAME_POINTER ret END(copyin) /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ ENTRY(casueword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret END(casueword32) /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ ENTRY(casueword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER ret END(casueword) /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit * byte from user memory. * addr = %rdi, valp = %rsi */ ALTENTRY(fueword64) ENTRY(fueword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movq (%rdi),%r11 movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword64) END(fueword) ENTRY(fueword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movl (%rdi),%r11d movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32) /* * fuswintr() and suswintr() are specialized variants of fuword16() and * suword16(), respectively. They are called from the profiling code, * potentially at interrupt time. If they fail, that's okay; good things * will happen later. They always fail for now, until the trap code is * able to deal with this. */ ALTENTRY(suswintr) ENTRY(fuswintr) movq $-1,%rax ret END(suswintr) END(fuswintr) ENTRY(fuword16) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault movzwl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16) ENTRY(fubyte) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault movzbl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte) ALIGN_TEXT fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) decq %rax POP_FRAME_POINTER ret /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to * user memory. All these functions are MPSAFE. * addr = %rdi, value = %rsi */ ALTENTRY(suword64) ENTRY(suword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movq %rsi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword64) END(suword) ENTRY(suword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32) ENTRY(suword16) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movw %si,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16) ENTRY(subyte) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax movb %al,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte) /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq %rcx,%r9 /* %r9 = *len */ xchgq %rdi,%rsi /* %rdi = from, %rsi = to */ movq PCPU(CURPCB),%rcx movq $cpystrflt,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ subq %rsi,%rax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpq %rdx,%rax jae 1f movq %rax,%rdx movq %rax,%r8 1: incq %rdx cld 2: decq %rdx jz 3f lodsb stosb orb %al,%al jnz 2b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp cpystrflt_x 3: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax cmpq %rax,%rsi jae cpystrflt 4: movq $ENAMETOOLONG,%rax jmp cpystrflt_x cpystrflt: movq $EFAULT,%rax cpystrflt_x: /* set *lencopied and return %eax */ movq PCPU(CURPCB),%rcx movq $0,PCB_ONFAULT(%rcx) testq %r9,%r9 jz 1f subq %rdx,%r8 movq %r8,(%r9) 1: POP_FRAME_POINTER ret END(copyinstr) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ xchgq %rdi,%rsi incq %rdx cld 1: decq %rdx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp 6f 4: /* rdx is zero -- return ENAMETOOLONG */ movq $ENAMETOOLONG,%rax 6: testq %rcx,%rcx jz 7f /* set *lencopied and return %rax */ subq %rdx,%r8 movq %r8,(%rcx) 7: POP_FRAME_POINTER ret END(copystr) /* * Handling of special amd64 registers and descriptor tables etc * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%fs /* Beware, use wrmsr to set 64 bit base */ movl %eax,%gs movl %eax,%ss /* reload code selector by turning return into intersegmental return */ popq %rax pushq $KCSEL pushq %rax MEXITCOUNT lretq END(lgdt) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movq %rbx,0(%rdi) /* save rbx */ movq %rsp,8(%rdi) /* save rsp */ movq %rbp,16(%rdi) /* save rbp */ movq %r12,24(%rdi) /* save r12 */ movq %r13,32(%rdi) /* save r13 */ movq %r14,40(%rdi) /* save r14 */ movq %r15,48(%rdi) /* save r15 */ movq 0(%rsp),%rdx /* get rta */ movq %rdx,56(%rdi) /* save rip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movq 0(%rdi),%rbx /* restore rbx */ movq 8(%rdi),%rsp /* restore rsp */ movq 16(%rdi),%rbp /* restore rbp */ movq 24(%rdi),%r12 /* restore r12 */ movq 32(%rdi),%r13 /* restore r13 */ movq 40(%rdi),%r14 /* restore r14 */ movq 48(%rdi),%r15 /* restore r15 */ movq 56(%rdi),%rdx /* get rta */ movq %rdx,0(%rsp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx rdmsr /* Read MSR pointed by %ecx. Returns hi byte in edx, lo in %eax */ salq $32,%rdx /* sign-shift %rdx left */ movl %eax,%eax /* zero-extend %eax -> %rax */ orq %rdx,%rax movq %rax,(%rsi) xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * Support for writing MSRs in the safe manner. */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx movl %esi,%eax sarq $32,%rsi movl %esi,%edx wrmsr /* Write MSR pointed by %ecx. Accepts hi byte in edx, lo in %eax. */ xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movq $0,PCB_ONFAULT(%r8) movl $EFAULT,%eax POP_FRAME_POINTER ret Index: projects/clang370-import/sys/amd64/include/intr_machdep.h =================================================================== --- projects/clang370-import/sys/amd64/include/intr_machdep.h (revision 288925) +++ projects/clang370-import/sys/amd64/include/intr_machdep.h (revision 288926) @@ -1,188 +1,189 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #ifdef _KERNEL /* * The maximum number of I/O interrupts we allow. This number is rather * arbitrary as it is just the maximum IRQ resource value. The interrupt * source for a given IRQ maps that I/O interrupt to device interrupt * source whether it be a pin on an interrupt controller or an MSI interrupt. * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device * interrupts allocate IDT vectors on demand. Currently we have 191 IDT * vectors available for device interrupts. On many systems with I/O APICs, * a lot of the IRQs are not used, so this number can be much larger than * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. * IRQ values from 256 to 767 are used by MSI. When running under the Xen * Hypervisor, IRQ values from 768 to 4863 are available for binding to * event channel events. We leave 255 unused to avoid confusion since 255 is * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 #ifdef XENHVM #include +#include #define NUM_EVTCHN_INTS NR_EVENT_CHANNELS #define FIRST_EVTCHN_INT \ (FIRST_MSI_INT + NUM_MSI_INTS) #define LAST_EVTCHN_INT \ (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) #else #define NUM_EVTCHN_INTS 0 #endif #define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 8 counters for each CPU for IPI counters for SMP. */ #ifdef SMP #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU) #else #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif #ifndef LOCORE typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); void (*pic_reprogram_pin)(struct intsrc *); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; }; struct trapframe; /* * The following data structure holds per-cpu data, and is placed just * above the top of the space used for the NMI stack. */ struct nmi_pcpu { register_t np_pcpu; register_t __padding; /* pad to 16 bytes */ }; extern struct mtx icu_lock; extern int elcr_found; #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(void); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intr_reprogram(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int *irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ Index: projects/clang370-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c =================================================================== --- projects/clang370-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c (revision 288925) +++ projects/clang370-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c (revision 288926) @@ -1,1448 +1,1464 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ #if !defined(lint) static const char sccsid[] = "@(#)ip_fil.c 2.41 6/5/96 (C) 1993-2000 Darren Reed"; static const char rcsid[] = "@(#)$Id$"; #endif #if defined(KERNEL) || defined(_KERNEL) # undef KERNEL # undef _KERNEL # define KERNEL 1 # define _KERNEL 1 #endif #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ !defined(KLD_MODULE) && !defined(IPFILTER_LKM) # include "opt_inet6.h" #endif #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \ !defined(KLD_MODULE) && !defined(IPFILTER_LKM) # include "opt_random_ip_id.h" #endif #include #include #include #include # include # include #include #include # include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) #include #endif # include # include #if !defined(__hpux) # include #endif #include # include # include #include # include # include #include #include #include #include #include #include #include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) #include #else #define CURVNET_SET(arg) #define CURVNET_RESTORE() #endif #if defined(__osf__) # include #endif #include #include #include #include "netinet/ip_compat.h" #ifdef USE_INET6 # include #endif #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_auth.h" #include "netinet/ip_sync.h" #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #ifdef IPFILTER_SCAN #include "netinet/ip_scan.h" #endif #include "netinet/ip_pool.h" # include #include #ifdef CSUM_DATA_VALID #include #endif extern int ip_optcopy __P((struct ip *, struct ip *)); # ifdef IPFILTER_M_IPFILTER MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures"); # endif static int (*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **)); static int ipf_send_ip __P((fr_info_t *, mb_t *)); static void ipf_timer_func __P((void *arg)); int ipf_locks_done = 0; ipf_main_softc_t ipfmain; # include # if defined(NETBSD_PF) # include # endif /* NETBSD_PF */ /* * We provide the ipf_checkp name just to minimize changes later. */ int (*ipf_checkp) __P((void *, ip_t *ip, int hlen, void *ifp, int out, mb_t **mp)); static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag; static void ipf_ifevent(void *arg); static void ipf_ifevent(arg) void *arg; { ipf_sync(arg, NULL); } static int ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) { struct ip *ip = mtod(*mp, struct ip *); int rv; /* * IPFilter expects evreything in network byte order */ #if (__FreeBSD_version < 1000019) ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); #endif rv = ipf_check(&ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), mp); #if (__FreeBSD_version < 1000019) if ((rv == 0) && (*mp != NULL)) { ip = mtod(*mp, struct ip *); ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); } #endif return rv; } # ifdef USE_INET6 # include static int ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) { return (ipf_check(&ipfmain, mtod(*mp, struct ip *), sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp)); } # endif #if defined(IPFILTER_LKM) int ipf_identify(s) char *s; { if (strcmp(s, "ipl") == 0) return 1; return 0; } #endif /* IPFILTER_LKM */ static void ipf_timer_func(arg) void *arg; { ipf_main_softc_t *softc = arg; SPL_INT(s); SPL_NET(s); READ_ENTER(&softc->ipf_global); if (softc->ipf_running > 0) ipf_slowtimer(softc); if (softc->ipf_running == -1 || softc->ipf_running == 1) { #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2); #endif callout_init(&softc->ipf_slow_ch, 1); callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT, ipf_timer_func, softc); } RWLOCK_EXIT(&softc->ipf_global); SPL_X(s); } int ipfattach(softc) ipf_main_softc_t *softc; { #ifdef USE_SPL int s; #endif SPL_NET(s); if (softc->ipf_running > 0) { SPL_X(s); return EBUSY; } if (ipf_init_all(softc) < 0) { SPL_X(s); return EIO; } if (ipf_checkp != ipf_check) { ipf_savep = ipf_checkp; ipf_checkp = ipf_check; } bzero((char *)ipfmain.ipf_selwait, sizeof(ipfmain.ipf_selwait)); softc->ipf_running = 1; if (softc->ipf_control_forwarding & 1) V_ipforwarding = 1; SPL_X(s); #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT); #endif callout_init(&softc->ipf_slow_ch, 1); callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT, ipf_timer_func, softc); return 0; } /* * Disable the filter by removing the hooks from the IP input/output * stream. */ int ipfdetach(softc) ipf_main_softc_t *softc; { #ifdef USE_SPL int s; #endif if (softc->ipf_control_forwarding & 2) V_ipforwarding = 0; SPL_NET(s); #if 0 if (softc->ipf_slow_ch.callout != NULL) untimeout(ipf_timer_func, softc, softc->ipf_slow_ch); bzero(&softc->ipf_slow, sizeof(softc->ipf_slow)); #endif callout_drain(&softc->ipf_slow_ch); #ifndef NETBSD_PF if (ipf_checkp != NULL) ipf_checkp = ipf_savep; ipf_savep = NULL; #endif ipf_fini_all(softc); softc->ipf_running = -2; SPL_X(s); return 0; } /* * Filter ioctl interface. */ int ipfioctl(dev, cmd, data, mode , p) struct thread *p; # define p_cred td_ucred # define p_uid td_ucred->cr_ruid struct cdev *dev; ioctlcmd_t cmd; caddr_t data; int mode; { int error = 0, unit = 0; SPL_INT(s); #if (BSD >= 199306) if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE)) { ipfmain.ipf_interror = 130001; return EPERM; } #endif unit = GET_MINOR(dev); if ((IPL_LOGMAX < unit) || (unit < 0)) { ipfmain.ipf_interror = 130002; return ENXIO; } if (ipfmain.ipf_running <= 0) { if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) { ipfmain.ipf_interror = 130003; return EIO; } if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && cmd != SIOCIPFSET && cmd != SIOCFRENB && cmd != SIOCGETFS && cmd != SIOCGETFF && cmd != SIOCIPFINTERROR) { ipfmain.ipf_interror = 130004; return EIO; } } SPL_NET(s); CURVNET_SET(TD_TO_VNET(p)); error = ipf_ioctlswitch(&ipfmain, unit, data, cmd, mode, p->p_uid, p); CURVNET_RESTORE(); if (error != -1) { SPL_X(s); return error; } SPL_X(s); return error; } /* * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that * requires a large amount of setting up and isn't any more efficient. */ int ipf_send_reset(fin) fr_info_t *fin; { struct tcphdr *tcp, *tcp2; int tlen = 0, hlen; struct mbuf *m; #ifdef USE_INET6 ip6_t *ip6; #endif ip_t *ip; tcp = fin->fin_dp; if (tcp->th_flags & TH_RST) return -1; /* feedback loop */ if (ipf_checkl4sum(fin) == -1) return -1; tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) + ((tcp->th_flags & TH_SYN) ? 1 : 0) + ((tcp->th_flags & TH_FIN) ? 1 : 0); #ifdef USE_INET6 hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t); #else hlen = sizeof(ip_t); #endif #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) return -1; if (sizeof(*tcp2) + hlen > MLEN) { if (!(MCLGET(m, M_NOWAIT))) { FREE_MB_T(m); return -1; } } m->m_len = sizeof(*tcp2) + hlen; #if (BSD >= 199103) m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = (struct ifnet *)0; #endif ip = mtod(m, struct ip *); bzero((char *)ip, hlen); #ifdef USE_INET6 ip6 = (ip6_t *)ip; #endif tcp2 = (struct tcphdr *)((char *)ip + hlen); tcp2->th_sport = tcp->th_dport; tcp2->th_dport = tcp->th_sport; if (tcp->th_flags & TH_ACK) { tcp2->th_seq = tcp->th_ack; tcp2->th_flags = TH_RST; tcp2->th_ack = 0; } else { tcp2->th_seq = 0; tcp2->th_ack = ntohl(tcp->th_seq); tcp2->th_ack += tlen; tcp2->th_ack = htonl(tcp2->th_ack); tcp2->th_flags = TH_RST|TH_ACK; } TCP_X2_A(tcp2, 0); TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2); tcp2->th_win = tcp->th_win; tcp2->th_sum = 0; tcp2->th_urp = 0; #ifdef USE_INET6 if (fin->fin_v == 6) { ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_hlim = 0; ip6->ip6_src = fin->fin_dst6.in6; ip6->ip6_dst = fin->fin_src6.in6; tcp2->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*ip6), sizeof(*tcp2)); return ipf_send_ip(fin, m); } #endif ip->ip_p = IPPROTO_TCP; ip->ip_len = htons(sizeof(struct tcphdr)); ip->ip_src.s_addr = fin->fin_daddr; ip->ip_dst.s_addr = fin->fin_saddr; tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2)); ip->ip_len = htons(hlen + sizeof(*tcp2)); return ipf_send_ip(fin, m); } /* * ip_len must be in network byte order when called. */ static int ipf_send_ip(fin, m) fr_info_t *fin; mb_t *m; { fr_info_t fnew; ip_t *ip, *oip; int hlen; ip = mtod(m, ip_t *); bzero((char *)&fnew, sizeof(fnew)); fnew.fin_main_soft = fin->fin_main_soft; IP_V_A(ip, fin->fin_v); switch (fin->fin_v) { case 4 : oip = fin->fin_ip; hlen = sizeof(*oip); fnew.fin_v = 4; fnew.fin_p = ip->ip_p; fnew.fin_plen = ntohs(ip->ip_len); IP_HL_A(ip, sizeof(*oip) >> 2); ip->ip_tos = oip->ip_tos; ip->ip_id = fin->fin_ip->ip_id; #if defined(FreeBSD) && (__FreeBSD_version > 460000) ip->ip_off = htons(path_mtu_discovery ? IP_DF : 0); #else ip->ip_off = 0; #endif ip->ip_ttl = V_ip_defttl; ip->ip_sum = 0; break; #ifdef USE_INET6 case 6 : { ip6_t *ip6 = (ip6_t *)ip; ip6->ip6_vfc = 0x60; ip6->ip6_hlim = IPDEFTTL; hlen = sizeof(*ip6); fnew.fin_p = ip6->ip6_nxt; fnew.fin_v = 6; fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; break; } #endif default : return EINVAL; } #ifdef IPSEC m->m_pkthdr.rcvif = NULL; #endif fnew.fin_ifp = fin->fin_ifp; fnew.fin_flx = FI_NOCKSUM; fnew.fin_m = m; fnew.fin_ip = ip; fnew.fin_mp = &m; fnew.fin_hlen = hlen; fnew.fin_dp = (char *)ip + hlen; (void) ipf_makefrip(hlen, ip, &fnew); return ipf_fastroute(m, &m, &fnew, NULL); } int ipf_send_icmp_err(type, fin, dst) int type; fr_info_t *fin; int dst; { int err, hlen, xtra, iclen, ohlen, avail, code; struct in_addr dst4; struct icmp *icmp; struct mbuf *m; i6addr_t dst6; void *ifp; #ifdef USE_INET6 ip6_t *ip6; #endif ip_t *ip, *ip2; if ((type < 0) || (type >= ICMP_MAXTYPE)) return -1; code = fin->fin_icode; #ifdef USE_INET6 #if 0 /* XXX Fix an off by one error: s/>/>=/ was: if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int))) Fix obtained from NetBSD ip_fil_netbsd.c r1.4: */ #endif if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int))) return -1; #endif if (ipf_checkl4sum(fin) == -1) return -1; #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) return -1; avail = MHLEN; xtra = 0; hlen = 0; ohlen = 0; dst4.s_addr = 0; ifp = fin->fin_ifp; if (fin->fin_v == 4) { if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT)) switch (ntohs(fin->fin_data[0]) >> 8) { case ICMP_ECHO : case ICMP_TSTAMP : case ICMP_IREQ : case ICMP_MASKREQ : break; default : FREE_MB_T(m); return 0; } if (dst == 0) { if (ipf_ifpaddr(&ipfmain, 4, FRI_NORMAL, ifp, &dst6, NULL) == -1) { FREE_MB_T(m); return -1; } dst4 = dst6.in4; } else dst4.s_addr = fin->fin_daddr; hlen = sizeof(ip_t); ohlen = fin->fin_hlen; iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen; if (fin->fin_hlen < fin->fin_plen) xtra = MIN(fin->fin_dlen, 8); else xtra = 0; } #ifdef USE_INET6 else if (fin->fin_v == 6) { hlen = sizeof(ip6_t); ohlen = sizeof(ip6_t); iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen; type = icmptoicmp6types[type]; if (type == ICMP6_DST_UNREACH) code = icmptoicmp6unreach[code]; if (iclen + max_linkhdr + fin->fin_plen > avail) { if (!(MCLGET(m, M_NOWAIT))) { FREE_MB_T(m); return -1; } avail = MCLBYTES; } xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr); xtra = MIN(xtra, IPV6_MMTU - iclen); if (dst == 0) { if (ipf_ifpaddr(&ipfmain, 6, FRI_NORMAL, ifp, &dst6, NULL) == -1) { FREE_MB_T(m); return -1; } } else dst6 = fin->fin_dst6; } #endif else { FREE_MB_T(m); return -1; } avail -= (max_linkhdr + iclen); if (avail < 0) { FREE_MB_T(m); return -1; } if (xtra > avail) xtra = avail; iclen += xtra; m->m_data += max_linkhdr; m->m_pkthdr.rcvif = (struct ifnet *)0; m->m_pkthdr.len = iclen; m->m_len = iclen; ip = mtod(m, ip_t *); icmp = (struct icmp *)((char *)ip + hlen); ip2 = (ip_t *)&icmp->icmp_ip; icmp->icmp_type = type; icmp->icmp_code = fin->fin_icode; icmp->icmp_cksum = 0; #ifdef icmp_nextmtu if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) { if (fin->fin_mtu != 0) { icmp->icmp_nextmtu = htons(fin->fin_mtu); } else if (ifp != NULL) { icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp)); } else { /* make up a number... */ icmp->icmp_nextmtu = htons(fin->fin_plen - 20); } } #endif bcopy((char *)fin->fin_ip, (char *)ip2, ohlen); #ifdef USE_INET6 ip6 = (ip6_t *)ip; if (fin->fin_v == 6) { ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; ip6->ip6_plen = htons(iclen - hlen); ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 0; ip6->ip6_src = dst6.in6; ip6->ip6_dst = fin->fin_src6.in6; if (xtra > 0) bcopy((char *)fin->fin_ip + ohlen, (char *)&icmp->icmp_ip + ohlen, xtra); icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), iclen - hlen); } else #endif { ip->ip_p = IPPROTO_ICMP; ip->ip_src.s_addr = dst4.s_addr; ip->ip_dst.s_addr = fin->fin_saddr; if (xtra > 0) bcopy((char *)fin->fin_ip + ohlen, (char *)&icmp->icmp_ip + ohlen, xtra); icmp->icmp_cksum = ipf_cksum((u_short *)icmp, sizeof(*icmp) + 8); ip->ip_len = htons(iclen); ip->ip_p = IPPROTO_ICMP; } err = ipf_send_ip(fin, m); return err; } /* * m0 - pointer to mbuf where the IP packet starts * mpp - pointer to the mbuf pointer that is the start of the mbuf chain */ int ipf_fastroute(m0, mpp, fin, fdp) mb_t *m0, **mpp; fr_info_t *fin; frdest_t *fdp; { register struct ip *ip, *mhip; register struct mbuf *m = *mpp; register struct route *ro; int len, off, error = 0, hlen, code; struct ifnet *ifp, *sifp; struct sockaddr_in *dst; struct route iproute; u_short ip_off; frdest_t node; frentry_t *fr; ro = NULL; #ifdef M_WRITABLE /* * HOT FIX/KLUDGE: * * If the mbuf we're about to send is not writable (because of * a cluster reference, for example) we'll need to make a copy * of it since this routine modifies the contents. * * If you have non-crappy network hardware that can transmit data * from the mbuf, rather than making a copy, this is gonna be a * problem. */ if (M_WRITABLE(m) == 0) { m0 = m_dup(m, M_NOWAIT); if (m0 != 0) { FREE_MB_T(m); m = m0; *mpp = m; } else { error = ENOBUFS; FREE_MB_T(m); goto done; } } #endif #ifdef USE_INET6 if (fin->fin_v == 6) { /* * currently "to " and "to :ip#" are not supported * for IPv6 */ return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } #endif hlen = fin->fin_hlen; ip = mtod(m0, struct ip *); ifp = NULL; /* * Route packet. */ ro = &iproute; bzero(ro, sizeof (*ro)); dst = (struct sockaddr_in *)&ro->ro_dst; dst->sin_family = AF_INET; dst->sin_addr = ip->ip_dst; fr = fin->fin_fr; if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) && (fdp->fd_type == FRD_DSTLIST)) { if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0) fdp = &node; } if (fdp != NULL) ifp = fdp->fd_ptr; else ifp = fin->fin_ifp; if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) { error = -2; goto bad; } if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0)) dst->sin_addr = fdp->fd_ip; dst->sin_len = sizeof(*dst); in_rtalloc(ro, M_GETFIB(m0)); if ((ifp == NULL) && (ro->ro_rt != NULL)) ifp = ro->ro_rt->rt_ifp; if ((ro->ro_rt == NULL) || (ifp == NULL)) { if (in_localaddr(ip->ip_dst)) error = EHOSTUNREACH; else error = ENETUNREACH; goto bad; } if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; if (ro->ro_rt) counter_u64_add(ro->ro_rt->rt_pksent, 1); /* * For input packets which are being "fastrouted", they won't * go back through output filtering and miss their chance to get * NAT'd and counted. Duplicated packets aren't considered to be * part of the normal packet stream, so do not NAT them or pass * them through stateful checking, etc. */ if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) { sifp = fin->fin_ifp; fin->fin_ifp = ifp; fin->fin_out = 1; (void) ipf_acctpkt(fin, NULL); fin->fin_fr = NULL; if (!fr || !(fr->fr_flags & FR_RETMASK)) { u_32_t pass; (void) ipf_state_check(fin, &pass); } switch (ipf_nat_checkout(fin, NULL)) { case 0 : break; case 1 : ip->ip_sum = 0; break; case -1 : error = -1; goto bad; break; } fin->fin_ifp = sifp; fin->fin_out = 0; } else ip->ip_sum = 0; /* * If small enough for interface, can just send directly. */ if (ntohs(ip->ip_len) <= ifp->if_mtu) { if (!ip->ip_sum) ip->ip_sum = in_cksum(m, hlen); error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro ); goto done; } /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ ip_off = ntohs(ip->ip_off); if (ip_off & IP_DF) { error = EMSGSIZE; goto bad; } len = (ifp->if_mtu - hlen) &~ 7; if (len < 8) { error = EMSGSIZE; goto bad; } { int mhlen, firstlen = len; struct mbuf **mnext = &m->m_act; /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. */ m0 = m; mhlen = sizeof (struct ip); for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == 0) { m = m0; error = ENOBUFS; goto bad; } m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); bcopy((char *)ip, (char *)mhip, sizeof(*ip)); if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); IP_HL_A(mhip, mhlen >> 2); } m->m_len = mhlen; mhip->ip_off = ((off - hlen) >> 3) + ip_off; if (off + len >= ntohs(ip->ip_len)) len = ntohs(ip->ip_len) - off; else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); *mnext = m; m->m_next = m_copy(m0, off, len); if (m->m_next == 0) { error = ENOBUFS; /* ??? */ goto sendorfree; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = NULL; mhip->ip_off = htons((u_short)mhip->ip_off); mhip->ip_sum = 0; mhip->ip_sum = in_cksum(m, mhlen); mnext = &m->m_act; } /* * Update first fragment by trimming what's been copied out * and updating header, then send each fragment (in order). */ m_adj(m0, hlen + firstlen - ip->ip_len); ip->ip_len = htons((u_short)(hlen + firstlen)); ip->ip_off = htons((u_short)IP_MF); ip->ip_sum = 0; ip->ip_sum = in_cksum(m0, hlen); sendorfree: for (m = m0; m; m = m0) { m0 = m->m_act; m->m_act = 0; if (error == 0) error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro ); else FREE_MB_T(m); } } done: if (!error) ipfmain.ipf_frouteok[0]++; else ipfmain.ipf_frouteok[1]++; if ((ro != NULL) && (ro->ro_rt != NULL)) { RTFREE(ro->ro_rt); } return 0; bad: if (error == EMSGSIZE) { sifp = fin->fin_ifp; code = fin->fin_icode; fin->fin_icode = ICMP_UNREACH_NEEDFRAG; fin->fin_ifp = ifp; (void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1); fin->fin_ifp = sifp; fin->fin_icode = code; } FREE_MB_T(m); goto done; } int ipf_verifysrc(fin) fr_info_t *fin; { struct sockaddr_in *dst; struct route iproute; bzero((char *)&iproute, sizeof(iproute)); dst = (struct sockaddr_in *)&iproute.ro_dst; dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; dst->sin_addr = fin->fin_src; in_rtalloc(&iproute, 0); if (iproute.ro_rt == NULL) return 0; return (fin->fin_ifp == iproute.ro_rt->rt_ifp); } /* * return the first IP Address associated with an interface */ int ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) ipf_main_softc_t *softc; int v, atype; void *ifptr; i6addr_t *inp, *inpmask; { #ifdef USE_INET6 struct in6_addr *inp6 = NULL; #endif struct sockaddr *sock, *mask; struct sockaddr_in *sin; struct ifaddr *ifa; struct ifnet *ifp; if ((ifptr == NULL) || (ifptr == (void *)-1)) return -1; sin = NULL; ifp = ifptr; if (v == 4) inp->in4.s_addr = 0; #ifdef USE_INET6 else if (v == 6) bzero((char *)inp, sizeof(*inp)); #endif ifa = TAILQ_FIRST(&ifp->if_addrhead); sock = ifa->ifa_addr; while (sock != NULL && ifa != NULL) { sin = (struct sockaddr_in *)sock; if ((v == 4) && (sin->sin_family == AF_INET)) break; #ifdef USE_INET6 if ((v == 6) && (sin->sin_family == AF_INET6)) { inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(inp6) && !IN6_IS_ADDR_LOOPBACK(inp6)) break; } #endif ifa = TAILQ_NEXT(ifa, ifa_link); if (ifa != NULL) sock = ifa->ifa_addr; } if (ifa == NULL || sin == NULL) return -1; mask = ifa->ifa_netmask; if (atype == FRI_BROADCAST) sock = ifa->ifa_broadaddr; else if (atype == FRI_PEERADDR) sock = ifa->ifa_dstaddr; if (sock == NULL) return -1; #ifdef USE_INET6 if (v == 6) { return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock, (struct sockaddr_in6 *)mask, inp, inpmask); } #endif return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock, (struct sockaddr_in *)mask, &inp->in4, &inpmask->in4); } u_32_t ipf_newisn(fin) fr_info_t *fin; { u_32_t newiss; newiss = arc4random(); return newiss; } INLINE int ipf_checkv4sum(fin) fr_info_t *fin; { #ifdef CSUM_DATA_VALID int manual = 0; u_short sum; ip_t *ip; mb_t *m; if ((fin->fin_flx & FI_NOCKSUM) != 0) return 0; if ((fin->fin_flx & FI_SHORT) != 0) return 1; if (fin->fin_cksum != FI_CK_NEEDED) return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1; m = fin->fin_m; if (m == NULL) { manual = 1; goto skipauto; } ip = fin->fin_ip; if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) == CSUM_IP_CHECKED) { fin->fin_cksum = FI_CK_BAD; fin->fin_flx |= FI_BAD; return -1; } if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + /* Depending on the driver, UDP may have zero checksum */ + if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx & + (FI_FRAG|FI_SHORT|FI_BAD)) == 0) { + udphdr_t *udp = fin->fin_dp; + if (udp->uh_sum == 0) { + /* + * we're good no matter what the hardware + * checksum flags and csum_data say (handling + * of csum_data for zero UDP checksum is not + * consistent across all drivers) + */ + fin->fin_cksum = 1; + return 0; + } + } + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) sum = m->m_pkthdr.csum_data; else sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + fin->fin_dlen + fin->fin_p)); sum ^= 0xffff; if (sum != 0) { fin->fin_cksum = FI_CK_BAD; fin->fin_flx |= FI_BAD; } else { fin->fin_cksum = FI_CK_SUMOK; return 0; } } else { if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) { fin->fin_cksum = FI_CK_L4FULL; return 0; } else if (m->m_pkthdr.csum_flags == CSUM_TCP || m->m_pkthdr.csum_flags == CSUM_UDP) { fin->fin_cksum = FI_CK_L4PART; return 0; } else if (m->m_pkthdr.csum_flags == CSUM_IP) { fin->fin_cksum = FI_CK_L4PART; return 0; } else { manual = 1; } } skipauto: if (manual != 0) { if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; return -1; } } #else if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; return -1; } #endif return 0; } #ifdef USE_INET6 INLINE int ipf_checkv6sum(fin) fr_info_t *fin; { if ((fin->fin_flx & FI_NOCKSUM) != 0) return 0; if ((fin->fin_flx & FI_SHORT) != 0) return 1; if (fin->fin_cksum != FI_CK_NEEDED) return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1; if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; return -1; } return 0; } #endif /* USE_INET6 */ size_t mbufchainlen(m0) struct mbuf *m0; { size_t len; if ((m0->m_flags & M_PKTHDR) != 0) { len = m0->m_pkthdr.len; } else { struct mbuf *m; for (m = m0, len = 0; m != NULL; m = m->m_next) len += m->m_len; } return len; } /* ------------------------------------------------------------------------ */ /* Function: ipf_pullup */ /* Returns: NULL == pullup failed, else pointer to protocol header */ /* Parameters: xmin(I)- pointer to buffer where data packet starts */ /* fin(I) - pointer to packet information */ /* len(I) - number of bytes to pullup */ /* */ /* Attempt to move at least len bytes (from the start of the buffer) into a */ /* single buffer for ease of access. Operating system native functions are */ /* used to manage buffers - if necessary. If the entire packet ends up in */ /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */ /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ /* and ONLY if the pullup succeeds. */ /* */ /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */ /* of buffers that starts at *fin->fin_mp. */ /* ------------------------------------------------------------------------ */ void * ipf_pullup(xmin, fin, len) mb_t *xmin; fr_info_t *fin; int len; { int dpoff, ipoff; mb_t *m = xmin; char *ip; if (m == NULL) return NULL; ip = (char *)fin->fin_ip; if ((fin->fin_flx & FI_COALESCE) != 0) return ip; ipoff = fin->fin_ipoff; if (fin->fin_dp != NULL) dpoff = (char *)fin->fin_dp - (char *)ip; else dpoff = 0; if (M_LEN(m) < len) { mb_t *n = *fin->fin_mp; /* * Assume that M_PKTHDR is set and just work with what is left * rather than check.. * Should not make any real difference, anyway. */ if (m != n) { /* * Record the mbuf that points to the mbuf that we're * about to go to work on so that we can update the * m_next appropriately later. */ for (; n->m_next != m; n = n->m_next) ; } else { n = NULL; } #ifdef MHLEN if (len > MHLEN) #else if (len > MLEN) #endif { #ifdef HAVE_M_PULLDOWN if (m_pulldown(m, 0, len, NULL) == NULL) m = NULL; #else FREE_MB_T(*fin->fin_mp); m = NULL; n = NULL; #endif } else { m = m_pullup(m, len); } if (n != NULL) n->m_next = m; if (m == NULL) { /* * When n is non-NULL, it indicates that m pointed to * a sub-chain (tail) of the mbuf and that the head * of this chain has not yet been free'd. */ if (n != NULL) { FREE_MB_T(*fin->fin_mp); } *fin->fin_mp = NULL; fin->fin_m = NULL; return NULL; } if (n == NULL) *fin->fin_mp = m; while (M_LEN(m) == 0) { m = m->m_next; } fin->fin_m = m; ip = MTOD(m, char *) + ipoff; fin->fin_ip = (ip_t *)ip; if (fin->fin_dp != NULL) fin->fin_dp = (char *)fin->fin_ip + dpoff; if (fin->fin_fraghdr != NULL) fin->fin_fraghdr = (char *)ip + ((char *)fin->fin_fraghdr - (char *)fin->fin_ip); } if (len == fin->fin_plen) fin->fin_flx |= FI_COALESCE; return ip; } int ipf_inject(fin, m) fr_info_t *fin; mb_t *m; { int error = 0; if (fin->fin_out == 0) { netisr_dispatch(NETISR_IP, m); } else { fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len); fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off); error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); } return error; } int ipf_pfil_unhook(void) { #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011) struct pfil_head *ph_inet; # ifdef USE_INET6 struct pfil_head *ph_inet6; # endif #endif #ifdef NETBSD_PF ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (ph_inet != NULL) pfil_remove_hook((void *)ipf_check_wrapper, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); # ifdef USE_INET6 ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (ph_inet6 != NULL) pfil_remove_hook((void *)ipf_check_wrapper6, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); # endif #endif return (0); } int ipf_pfil_hook(void) { #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011) struct pfil_head *ph_inet; # ifdef USE_INET6 struct pfil_head *ph_inet6; # endif #endif # ifdef NETBSD_PF ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); # ifdef USE_INET6 ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); # endif if (ph_inet == NULL # ifdef USE_INET6 && ph_inet6 == NULL # endif ) { return ENODEV; } if (ph_inet != NULL) pfil_add_hook((void *)ipf_check_wrapper, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); # ifdef USE_INET6 if (ph_inet6 != NULL) pfil_add_hook((void *)ipf_check_wrapper6, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); # endif # endif return (0); } void ipf_event_reg(void) { ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \ ipf_ifevent, &ipfmain, \ EVENTHANDLER_PRI_ANY); ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \ ipf_ifevent, &ipfmain, \ EVENTHANDLER_PRI_ANY); ipf_clonetag = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \ &ipfmain, EVENTHANDLER_PRI_ANY); } void ipf_event_dereg(void) { if (ipf_arrivetag != NULL) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag); } if (ipf_departtag != NULL) { EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag); } if (ipf_clonetag != NULL) { EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag); } } u_32_t ipf_random() { return arc4random(); } u_int ipf_pcksum(fin, hlen, sum) fr_info_t *fin; int hlen; u_int sum; { struct mbuf *m; u_int sum2; int off; m = fin->fin_m; off = (char *)fin->fin_dp - (char *)fin->fin_ip; m->m_data += hlen; m->m_len -= hlen; sum2 = in_cksum(fin->fin_m, fin->fin_plen - off); m->m_len += hlen; m->m_data -= hlen; /* * Both sum and sum2 are partial sums, so combine them together. */ sum += ~sum2 & 0xffff; while (sum > 0xffff) sum = (sum & 0xffff) + (sum >> 16); sum2 = ~sum & 0xffff; return sum2; } Property changes on: projects/clang370-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c:r283596-287673,287676-288925 Index: projects/clang370-import/sys/contrib/ipfilter =================================================================== --- projects/clang370-import/sys/contrib/ipfilter (revision 288925) +++ projects/clang370-import/sys/contrib/ipfilter (revision 288926) Property changes on: projects/clang370-import/sys/contrib/ipfilter ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/ipfilter:r288457-288925 Index: projects/clang370-import/sys/dev/xen/blkfront/blkfront.c =================================================================== --- projects/clang370-import/sys/dev/xen/blkfront/blkfront.c (revision 288925) +++ projects/clang370-import/sys/dev/xen/blkfront/blkfront.c (revision 288926) @@ -1,1571 +1,1571 @@ /* * XenBSD block device driver * * Copyright (c) 2010-2013 Spectra Logic Corporation * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /*--------------------------- Forward Declarations ---------------------------*/ static void xbd_closing(device_t); static void xbd_startio(struct xbd_softc *sc); /*---------------------------------- Macros ----------------------------------*/ #if 0 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) #else #define DPRINTK(fmt, args...) #endif #define XBD_SECTOR_SHFT 9 /*---------------------------- Global Static Data ----------------------------*/ static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); static int xbd_enable_indirect = 1; SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters"); SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, &xbd_enable_indirect, 0, "Enable xbd indirect segments"); /*---------------------------- Command Processing ----------------------------*/ static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0) return; sc->xbd_flags |= xbd_flag; sc->xbd_qfrozen_cnt++; } static void xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0) return; if (sc->xbd_qfrozen_cnt == 0) panic("%s: Thaw with flag 0x%x while not frozen.", __func__, xbd_flag); sc->xbd_flags &= ~xbd_flag; sc->xbd_qfrozen_cnt--; } static void xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) { if ((cm->cm_flags & XBDCF_FROZEN) != 0) return; cm->cm_flags |= XBDCF_FROZEN|cm_flag; xbd_freeze(sc, XBDF_NONE); } static void xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) { if ((cm->cm_flags & XBDCF_FROZEN) == 0) return; cm->cm_flags &= ~XBDCF_FROZEN; xbd_thaw(sc, XBDF_NONE); } static inline void xbd_flush_requests(struct xbd_softc *sc) { int notify; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify); if (notify) xen_intr_signal(sc->xen_intr_handle); } static void xbd_free_command(struct xbd_command *cm) { KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE, ("Freeing command that is still on queue %d.", cm->cm_flags & XBDCF_Q_MASK)); cm->cm_flags = XBDCF_INITIALIZER; cm->cm_bp = NULL; cm->cm_complete = NULL; xbd_enqueue_cm(cm, XBD_Q_FREE); xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE); } static void xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, grant_ref_t * gref_head, int otherend_id, int readonly, - grant_ref_t * sg_ref, blkif_request_segment_t * sg) + grant_ref_t * sg_ref, struct blkif_request_segment *sg) { struct blkif_request_segment *last_block_sg = sg + nsegs; vm_paddr_t buffer_ma; uint64_t fsect, lsect; int ref; while (sg < last_block_sg) { buffer_ma = segs->ds_addr; fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; KASSERT(lsect <= 7, ("XEN disk driver data cannot " "cross a page boundary")); /* install a grant reference. */ ref = gnttab_claim_grant_reference(gref_head); /* * GNTTAB_LIST_END == 0xffffffff, but it is private * to gnttab.c. */ KASSERT(ref != ~0, ("grant_reference failed")); gnttab_grant_foreign_access_ref( ref, otherend_id, buffer_ma >> PAGE_SHIFT, readonly); *sg_ref = ref; *sg = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; sg++; sg_ref++; segs++; } } static void xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct xbd_softc *sc; struct xbd_command *cm; int op; cm = arg; sc = cm->cm_sc; if (error) { cm->cm_bp->bio_error = EIO; biodone(cm->cm_bp); xbd_free_command(cm); return; } KASSERT(nsegs <= sc->xbd_max_request_segments, ("Too many segments in a blkfront I/O")); if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) { blkif_request_t *ring_req; /* Fill out a blkif_request_t structure. */ ring_req = (blkif_request_t *) RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); sc->xbd_ring.req_prod_pvt++; ring_req->id = cm->cm_id; ring_req->operation = cm->cm_operation; ring_req->sector_number = cm->cm_sector_number; ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; ring_req->nr_segments = nsegs; cm->cm_nseg = nsegs; xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, xenbus_get_otherend_id(sc->xbd_dev), cm->cm_operation == BLKIF_OP_WRITE, cm->cm_sg_refs, ring_req->seg); } else { blkif_request_indirect_t *ring_req; /* Fill out a blkif_request_indirect_t structure. */ ring_req = (blkif_request_indirect_t *) RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); sc->xbd_ring.req_prod_pvt++; ring_req->id = cm->cm_id; ring_req->operation = BLKIF_OP_INDIRECT; ring_req->indirect_op = cm->cm_operation; ring_req->sector_number = cm->cm_sector_number; ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; ring_req->nr_segments = nsegs; cm->cm_nseg = nsegs; xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, xenbus_get_otherend_id(sc->xbd_dev), cm->cm_operation == BLKIF_OP_WRITE, cm->cm_sg_refs, cm->cm_indirectionpages); memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs, sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages); } if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_PREREAD; else if (cm->cm_operation == BLKIF_OP_WRITE) op = BUS_DMASYNC_PREWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); gnttab_free_grant_references(cm->cm_gref_head); xbd_enqueue_cm(cm, XBD_Q_BUSY); /* * If bus dma had to asynchronously call us back to dispatch * this command, we are no longer executing in the context of * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to * xbd_flush_requests() to publish this command to the backend * along with any other commands that it could batch. */ if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0) xbd_flush_requests(sc); return; } static int xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) { int error; error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data, cm->cm_datalen, xbd_queue_cb, cm, 0); if (error == EINPROGRESS) { /* * Maintain queuing order by freezing the queue. The next * command may not require as many resources as the command * we just attempted to map, so we can't rely on bus dma * blocking for it too. */ xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); return (0); } return (error); } static void xbd_restart_queue_callback(void *arg) { struct xbd_softc *sc = arg; mtx_lock(&sc->xbd_io_lock); xbd_thaw(sc, XBDF_GNT_SHORTAGE); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); } static struct xbd_command * xbd_bio_command(struct xbd_softc *sc) { struct xbd_command *cm; struct bio *bp; if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED)) return (NULL); bp = xbd_dequeue_bio(sc); if (bp == NULL) return (NULL); if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) { xbd_freeze(sc, XBDF_CM_SHORTAGE); xbd_requeue_bio(sc, bp); return (NULL); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { gnttab_request_free_callback(&sc->xbd_callback, xbd_restart_queue_callback, sc, sc->xbd_max_request_segments); xbd_freeze(sc, XBDF_GNT_SHORTAGE); xbd_requeue_bio(sc, bp); xbd_enqueue_cm(cm, XBD_Q_FREE); return (NULL); } cm->cm_bp = bp; cm->cm_data = bp->bio_data; cm->cm_datalen = bp->bio_bcount; cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; switch (bp->bio_cmd) { case BIO_READ: cm->cm_operation = BLKIF_OP_READ; break; case BIO_WRITE: cm->cm_operation = BLKIF_OP_WRITE; if ((bp->bio_flags & BIO_ORDERED) != 0) { if ((sc->xbd_flags & XBDF_BARRIER) != 0) { cm->cm_operation = BLKIF_OP_WRITE_BARRIER; } else { /* * Single step this command. */ cm->cm_flags |= XBDCF_Q_FREEZE; if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Wait for in-flight requests to * finish. */ xbd_freeze(sc, XBDF_WAIT_IDLE); xbd_requeue_cm(cm, XBD_Q_READY); return (NULL); } } } break; case BIO_FLUSH: if ((sc->xbd_flags & XBDF_FLUSH) != 0) cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; else if ((sc->xbd_flags & XBDF_BARRIER) != 0) cm->cm_operation = BLKIF_OP_WRITE_BARRIER; else panic("flush request, but no flush support available"); break; default: panic("unknown bio command %d", bp->bio_cmd); } return (cm); } /* * Dequeue buffers and place them in the shared communication ring. * Return when no more requests can be accepted or all buffers have * been queued. * * Signal XEN once the ring has been filled out. */ static void xbd_startio(struct xbd_softc *sc) { struct xbd_command *cm; int error, queued = 0; mtx_assert(&sc->xbd_io_lock, MA_OWNED); if (sc->xbd_state != XBD_STATE_CONNECTED) return; while (!RING_FULL(&sc->xbd_ring)) { if (sc->xbd_qfrozen_cnt != 0) break; cm = xbd_dequeue_cm(sc, XBD_Q_READY); if (cm == NULL) cm = xbd_bio_command(sc); if (cm == NULL) break; if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { /* * Single step command. Future work is * held off until this command completes. */ xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); } if ((error = xbd_queue_request(sc, cm)) != 0) { printf("xbd_queue_request returned %d\n", error); break; } queued++; } if (queued != 0) xbd_flush_requests(sc); } static void xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm) { struct bio *bp; bp = cm->cm_bp; if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) { disk_err(bp, "disk error" , -1, 0); printf(" status: %x\n", cm->cm_status); bp->bio_flags |= BIO_ERROR; } if (bp->bio_flags & BIO_ERROR) bp->bio_error = EIO; else bp->bio_resid = 0; xbd_free_command(cm); biodone(bp); } static void xbd_int(void *xsc) { struct xbd_softc *sc = xsc; struct xbd_command *cm; blkif_response_t *bret; RING_IDX i, rp; int op; mtx_lock(&sc->xbd_io_lock); if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) { mtx_unlock(&sc->xbd_io_lock); return; } again: rp = sc->xbd_ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ for (i = sc->xbd_ring.rsp_cons; i != rp;) { bret = RING_GET_RESPONSE(&sc->xbd_ring, i); cm = &sc->xbd_shadow[bret->id]; xbd_remove_cm(cm, XBD_Q_BUSY); gnttab_end_foreign_access_references(cm->cm_nseg, cm->cm_sg_refs); i++; if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_POSTREAD; else if (cm->cm_operation == BLKIF_OP_WRITE || cm->cm_operation == BLKIF_OP_WRITE_BARRIER) op = BUS_DMASYNC_POSTWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map); /* * Release any hold this command has on future command * dispatch. */ xbd_cm_thaw(sc, cm); /* * Directly call the i/o complete routine to save an * an indirection in the common case. */ cm->cm_status = bret->status; if (cm->cm_bp) xbd_bio_complete(sc, cm); else if (cm->cm_complete != NULL) cm->cm_complete(cm); else xbd_free_command(cm); } sc->xbd_ring.rsp_cons = i; if (i != sc->xbd_ring.req_prod_pvt) { int more_to_do; RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do); if (more_to_do) goto again; } else { sc->xbd_ring.sring->rsp_event = i + 1; } if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) xbd_thaw(sc, XBDF_WAIT_IDLE); xbd_startio(sc); if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED)) wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]); mtx_unlock(&sc->xbd_io_lock); } /*------------------------------- Dump Support -------------------------------*/ /** * Quiesce the disk writes for a dump file before allowing the next buffer. */ static void xbd_quiesce(struct xbd_softc *sc) { int mtd; // While there are outstanding requests while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); if (mtd) { /* Recieved request completions, update queue. */ xbd_int(sc); } if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Still pending requests, wait for the disk i/o * to complete. */ HYPERVISOR_yield(); } } } /* Kernel dump function for a paravirtualized disk device */ static void xbd_dump_complete(struct xbd_command *cm) { xbd_enqueue_cm(cm, XBD_Q_COMPLETE); } static int xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct disk *dp = arg; struct xbd_softc *sc = dp->d_drv1; struct xbd_command *cm; size_t chunk; int sbp; int rc = 0; if (length <= 0) return (rc); xbd_quiesce(sc); /* All quiet on the western front. */ /* * If this lock is held, then this module is failing, and a * successful kernel dump is highly unlikely anyway. */ mtx_lock(&sc->xbd_io_lock); /* Split the 64KB block as needed */ for (sbp=0; length > 0; sbp++) { cm = xbd_dequeue_cm(sc, XBD_Q_FREE); if (cm == NULL) { mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "dump: no more commands?\n"); return (EBUSY); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { xbd_free_command(cm); mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "no more grant allocs?\n"); return (EBUSY); } chunk = length > sc->xbd_max_request_size ? sc->xbd_max_request_size : length; cm->cm_data = virtual; cm->cm_datalen = chunk; cm->cm_operation = BLKIF_OP_WRITE; cm->cm_sector_number = offset / dp->d_sectorsize; cm->cm_complete = xbd_dump_complete; xbd_enqueue_cm(cm, XBD_Q_READY); length -= chunk; offset += chunk; virtual = (char *) virtual + chunk; } /* Tell DOM0 to do the I/O */ xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); /* Poll for the completion. */ xbd_quiesce(sc); /* All quite on the eastern front */ /* If there were any errors, bail out... */ while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) { if (cm->cm_status != BLKIF_RSP_OKAY) { device_printf(sc->xbd_dev, "Dump I/O failed at sector %jd\n", cm->cm_sector_number); rc = EIO; } xbd_free_command(cm); } return (rc); } /*----------------------------- Disk Entrypoints -----------------------------*/ static int xbd_open(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) { printf("xb%d: not found", sc->xbd_unit); return (ENXIO); } sc->xbd_flags |= XBDF_OPEN; sc->xbd_users++; return (0); } static int xbd_close(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); sc->xbd_flags &= ~XBDF_OPEN; if (--(sc->xbd_users) == 0) { /* * Check whether we have been instructed to close. We will * have ignored this request initially, as the device was * still mounted. */ if (xenbus_get_otherend_state(sc->xbd_dev) == XenbusStateClosing) xbd_closing(sc->xbd_dev); } return (0); } static int xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); return (ENOTTY); } /* * Read/write routine for a buffer. Finds the proper unit, place it on * the sortq and kick the controller. */ static void xbd_strategy(struct bio *bp) { struct xbd_softc *sc = bp->bio_disk->d_drv1; /* bogus disk? */ if (sc == NULL) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); return; } /* * Place it in the queue of disk activities for this disk */ mtx_lock(&sc->xbd_io_lock); xbd_enqueue_bio(sc, bp); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); return; } /*------------------------------ Ring Management -----------------------------*/ static int xbd_alloc_ring(struct xbd_softc *sc) { blkif_sring_t *sring; uintptr_t sring_page_addr; int error; int i; sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sring == NULL) { xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring"); return (ENOMEM); } SHARED_RING_INIT(sring); FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE); for (i = 0, sring_page_addr = (uintptr_t)sring; i < sc->xbd_ring_pages; i++, sring_page_addr += PAGE_SIZE) { error = xenbus_grant_ring(sc->xbd_dev, (vtophys(sring_page_addr) >> PAGE_SHIFT), &sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "granting ring_ref(%d)", i); return (error); } } if (sc->xbd_ring_pages == 1) { error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), "ring-ref", "%u", sc->xbd_ring_ref[0]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-ref", xenbus_get_node(sc->xbd_dev)); return (error); } } else { for (i = 0; i < sc->xbd_ring_pages; i++) { char ring_ref_name[]= "ring_refXX"; snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), ring_ref_name, "%u", sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/%s", xenbus_get_node(sc->xbd_dev), ring_ref_name); return (error); } } } error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev, xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc, INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "xen_intr_alloc_and_bind_local_port failed"); return (error); } return (0); } static void xbd_free_ring(struct xbd_softc *sc) { int i; if (sc->xbd_ring.sring == NULL) return; for (i = 0; i < sc->xbd_ring_pages; i++) { if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) { gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]); sc->xbd_ring_ref[i] = GRANT_REF_INVALID; } } free(sc->xbd_ring.sring, M_XENBLOCKFRONT); sc->xbd_ring.sring = NULL; } /*-------------------------- Initialization/Teardown -------------------------*/ static int xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) { struct sbuf sb; int feature_cnt; sbuf_new(&sb, features, len, SBUF_FIXEDLEN); feature_cnt = 0; if ((sc->xbd_flags & XBDF_FLUSH) != 0) { sbuf_printf(&sb, "flush"); feature_cnt++; } if ((sc->xbd_flags & XBDF_BARRIER) != 0) { if (feature_cnt != 0) sbuf_printf(&sb, ", "); sbuf_printf(&sb, "write_barrier"); feature_cnt++; } (void) sbuf_finish(&sb); return (sbuf_len(&sb)); } static int xbd_sysctl_features(SYSCTL_HANDLER_ARGS) { char features[80]; struct xbd_softc *sc = arg1; int error; int len; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); len = xbd_feature_string(sc, features, sizeof(features)); /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); } static void xbd_setup_sysctl(struct xbd_softc *xbd) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; struct sysctl_oid_list *children; sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev); if (sysctl_tree == NULL) return; children = SYSCTL_CHILDREN(sysctl_tree); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, "maximum outstanding requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_segments", CTLFLAG_RD, &xbd->xbd_max_request_segments, 0, "maximum number of pages per requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, "maximum size in bytes of a request (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, "communication channel pages (negotiated)"); SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, xbd_sysctl_features, "A", "protocol features (negotiated)"); } /* * Translate Linux major/minor to an appropriate name and unit * number. For HVM guests, this allows us to use the same drive names * with blkfront as the emulated drives, easing transition slightly. */ static void xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name) { static struct vdev_info { int major; int shift; int base; const char *name; } info[] = { {3, 6, 0, "ada"}, /* ide0 */ {22, 6, 2, "ada"}, /* ide1 */ {33, 6, 4, "ada"}, /* ide2 */ {34, 6, 6, "ada"}, /* ide3 */ {56, 6, 8, "ada"}, /* ide4 */ {57, 6, 10, "ada"}, /* ide5 */ {88, 6, 12, "ada"}, /* ide6 */ {89, 6, 14, "ada"}, /* ide7 */ {90, 6, 16, "ada"}, /* ide8 */ {91, 6, 18, "ada"}, /* ide9 */ {8, 4, 0, "da"}, /* scsi disk0 */ {65, 4, 16, "da"}, /* scsi disk1 */ {66, 4, 32, "da"}, /* scsi disk2 */ {67, 4, 48, "da"}, /* scsi disk3 */ {68, 4, 64, "da"}, /* scsi disk4 */ {69, 4, 80, "da"}, /* scsi disk5 */ {70, 4, 96, "da"}, /* scsi disk6 */ {71, 4, 112, "da"}, /* scsi disk7 */ {128, 4, 128, "da"}, /* scsi disk8 */ {129, 4, 144, "da"}, /* scsi disk9 */ {130, 4, 160, "da"}, /* scsi disk10 */ {131, 4, 176, "da"}, /* scsi disk11 */ {132, 4, 192, "da"}, /* scsi disk12 */ {133, 4, 208, "da"}, /* scsi disk13 */ {134, 4, 224, "da"}, /* scsi disk14 */ {135, 4, 240, "da"}, /* scsi disk15 */ {202, 4, 0, "xbd"}, /* xbd */ {0, 0, 0, NULL}, }; int major = vdevice >> 8; int minor = vdevice & 0xff; int i; if (vdevice & (1 << 28)) { *unit = (vdevice & ((1 << 28) - 1)) >> 8; *name = "xbd"; return; } for (i = 0; info[i].major; i++) { if (info[i].major == major) { *unit = info[i].base + (minor >> info[i].shift); *name = info[i].name; return; } } *unit = minor >> 4; *name = "xbd"; } int xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, int vdevice, uint16_t vdisk_info, unsigned long sector_size) { char features[80]; int unit, error = 0; const char *name; xbd_vdevice_to_unit(vdevice, &unit, &name); sc->xbd_unit = unit; if (strcmp(name, "xbd") != 0) device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); if (xbd_feature_string(sc, features, sizeof(features)) > 0) { device_printf(sc->xbd_dev, "features: %s\n", features); } sc->xbd_disk = disk_alloc(); sc->xbd_disk->d_unit = sc->xbd_unit; sc->xbd_disk->d_open = xbd_open; sc->xbd_disk->d_close = xbd_close; sc->xbd_disk->d_ioctl = xbd_ioctl; sc->xbd_disk->d_strategy = xbd_strategy; sc->xbd_disk->d_dump = xbd_dump; sc->xbd_disk->d_name = name; sc->xbd_disk->d_drv1 = sc; sc->xbd_disk->d_sectorsize = sector_size; sc->xbd_disk->d_mediasize = sectors * sector_size; sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; sc->xbd_disk->d_flags = 0; if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; device_printf(sc->xbd_dev, "synchronize cache commands enabled.\n"); } disk_create(sc->xbd_disk, DISK_VERSION); return error; } static void xbd_free(struct xbd_softc *sc) { int i; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_DISCONNECTED; mtx_unlock(&sc->xbd_io_lock); /* Free resources associated with old device channel. */ xbd_free_ring(sc); if (sc->xbd_shadow) { for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; cm = &sc->xbd_shadow[i]; if (cm->cm_sg_refs != NULL) { free(cm->cm_sg_refs, M_XENBLOCKFRONT); cm->cm_sg_refs = NULL; } if (cm->cm_indirectionpages != NULL) { gnttab_end_foreign_access_references( sc->xbd_max_request_indirectpages, &cm->cm_indirectionrefs[0]); contigfree(cm->cm_indirectionpages, PAGE_SIZE * sc->xbd_max_request_indirectpages, M_XENBLOCKFRONT); cm->cm_indirectionpages = NULL; } bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); } free(sc->xbd_shadow, M_XENBLOCKFRONT); sc->xbd_shadow = NULL; bus_dma_tag_destroy(sc->xbd_io_dmat); xbd_initq_cm(sc, XBD_Q_FREE); xbd_initq_cm(sc, XBD_Q_READY); xbd_initq_cm(sc, XBD_Q_COMPLETE); } xen_intr_unbind(&sc->xen_intr_handle); } /*--------------------------- State Change Handlers --------------------------*/ static void xbd_initialize(struct xbd_softc *sc) { const char *otherend_path; const char *node_path; uint32_t max_ring_page_order; int error; if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { /* Initialization has already been performed. */ return; } /* * Protocol defaults valid even if negotiation for a * setting fails. */ max_ring_page_order = 0; sc->xbd_ring_pages = 1; /* * Protocol negotiation. * * \note xs_gather() returns on the first encountered error, so * we must use independant calls in order to guarantee * we don't miss information in a sparsly populated back-end * tree. * * \note xs_scanf() does not update variables for unmatched * fields. */ otherend_path = xenbus_get_otherend_path(sc->xbd_dev); node_path = xenbus_get_node(sc->xbd_dev); /* Support both backend schemes for relaying ring page limits. */ (void)xs_scanf(XST_NIL, otherend_path, "max-ring-page-order", NULL, "%" PRIu32, &max_ring_page_order); sc->xbd_ring_pages = 1 << max_ring_page_order; (void)xs_scanf(XST_NIL, otherend_path, "max-ring-pages", NULL, "%" PRIu32, &sc->xbd_ring_pages); if (sc->xbd_ring_pages < 1) sc->xbd_ring_pages = 1; if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) { device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "limited to front-end limit of %u.\n", sc->xbd_ring_pages, XBD_MAX_RING_PAGES); sc->xbd_ring_pages = XBD_MAX_RING_PAGES; } if (powerof2(sc->xbd_ring_pages) == 0) { uint32_t new_page_limit; new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1); device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "is not a power of 2. Limited to %u.\n", sc->xbd_ring_pages, new_page_limit); sc->xbd_ring_pages = new_page_limit; } sc->xbd_max_requests = BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE); if (sc->xbd_max_requests > XBD_MAX_REQUESTS) { device_printf(sc->xbd_dev, "Back-end specified max_requests of %u " "limited to front-end limit of %zu.\n", sc->xbd_max_requests, XBD_MAX_REQUESTS); sc->xbd_max_requests = XBD_MAX_REQUESTS; } if (xbd_alloc_ring(sc) != 0) return; /* Support both backend schemes for relaying ring page limits. */ if (sc->xbd_ring_pages > 1) { error = xs_printf(XST_NIL, node_path, "num-ring-pages","%u", sc->xbd_ring_pages); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/num-ring-pages", node_path); return; } error = xs_printf(XST_NIL, node_path, "ring-page-order", "%u", fls(sc->xbd_ring_pages) - 1); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-page-order", node_path); return; } } error = xs_printf(XST_NIL, node_path, "event-channel", "%u", xen_intr_port(sc->xen_intr_handle)); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/event-channel", node_path); return; } error = xs_printf(XST_NIL, node_path, "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/protocol", node_path); return; } xenbus_set_state(sc->xbd_dev, XenbusStateInitialised); } /* * Invoked when the backend is finally 'ready' (and has published * the details about the physical device - #sectors, size, etc). */ static void xbd_connect(struct xbd_softc *sc) { device_t dev = sc->xbd_dev; unsigned long sectors, sector_size; unsigned int binfo; int err, feature_barrier, feature_flush; int i, j; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) return; DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "sectors", "%lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading backend fields at %s", xenbus_get_otherend_path(dev)); return; } err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-barrier", "%lu", &feature_barrier, NULL); if (err == 0 && feature_barrier != 0) sc->xbd_flags |= XBDF_BARRIER; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-flush-cache", "%lu", &feature_flush, NULL); if (err == 0 && feature_flush != 0) sc->xbd_flags |= XBDF_FLUSH; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-max-indirect-segments", "%" PRIu32, &sc->xbd_max_request_segments, NULL); if ((err != 0) || (xbd_enable_indirect == 0)) sc->xbd_max_request_segments = 0; if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS) sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS; if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS)) sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS); sc->xbd_max_request_indirectpages = XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments); if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; sc->xbd_max_request_size = XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); /* Allocate datastructures based on negotiated values. */ err = bus_dma_tag_create( bus_get_dma_tag(sc->xbd_dev), /* parent */ 512, PAGE_SIZE, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->xbd_max_request_size, sc->xbd_max_request_segments, PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->xbd_io_lock, /* lockarg */ &sc->xbd_io_dmat); if (err != 0) { xenbus_dev_fatal(sc->xbd_dev, err, "Cannot allocate parent DMA tag\n"); return; } /* Per-transaction data allocation. */ sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sc->xbd_shadow == NULL) { bus_dma_tag_destroy(sc->xbd_io_dmat); xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "Cannot allocate request structures\n"); return; } for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; void * indirectpages; cm = &sc->xbd_shadow[i]; cm->cm_sg_refs = malloc( sizeof(grant_ref_t) * sc->xbd_max_request_segments, M_XENBLOCKFRONT, M_NOWAIT); if (cm->cm_sg_refs == NULL) break; cm->cm_id = i; cm->cm_flags = XBDCF_INITIALIZER; cm->cm_sc = sc; if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) break; if (sc->xbd_max_request_indirectpages > 0) { indirectpages = contigmalloc( PAGE_SIZE * sc->xbd_max_request_indirectpages, M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0); } else { indirectpages = NULL; } for (j = 0; j < sc->xbd_max_request_indirectpages; j++) { if (gnttab_grant_foreign_access( xenbus_get_otherend_id(sc->xbd_dev), (vtophys(indirectpages) >> PAGE_SHIFT) + j, 1 /* grant read-only access */, &cm->cm_indirectionrefs[j])) break; } if (j < sc->xbd_max_request_indirectpages) break; cm->cm_indirectionpages = indirectpages; xbd_free_command(cm); } if (sc->xbd_disk == NULL) { device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), device_get_desc(dev), xenbus_get_node(dev)); bus_print_child_footer(device_get_parent(dev), dev); xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo, sector_size); } (void)xenbus_set_state(dev, XenbusStateConnected); /* Kick pending requests. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_CONNECTED; xbd_startio(sc); sc->xbd_flags |= XBDF_READY; mtx_unlock(&sc->xbd_io_lock); } /** * Handle the change of state of the backend to Closing. We must delete our * device-layer structures now, to ensure that writes are flushed through to * the backend. Once this is done, we can switch to Closed in * acknowledgement. */ static void xbd_closing(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); xenbus_set_state(dev, XenbusStateClosing); DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev)); if (sc->xbd_disk != NULL) { disk_destroy(sc->xbd_disk); sc->xbd_disk = NULL; } xenbus_set_state(dev, XenbusStateClosed); } /*---------------------------- NewBus Entrypoints ----------------------------*/ static int xbd_probe(device_t dev) { if (strcmp(xenbus_get_type(dev), "vbd") != 0) return (ENXIO); if (xen_hvm_domain() && xen_disable_pv_disks != 0) return (ENXIO); if (xen_hvm_domain()) { int error; char *type; /* * When running in an HVM domain, IDE disk emulation is * disabled early in boot so that native drivers will * not see emulated hardware. However, CDROM device * emulation cannot be disabled. * * Through use of FreeBSD's vm_guest and xen_hvm_domain() * APIs, we could modify the native CDROM driver to fail its * probe when running under Xen. Unfortunatlely, the PV * CDROM support in XenServer (up through at least version * 6.2) isn't functional, so we instead rely on the emulated * CDROM instance, and fail to attach the PV one here in * the blkfront driver. */ error = xs_read(XST_NIL, xenbus_get_node(dev), "device-type", NULL, (void **) &type); if (error) return (ENXIO); if (strncmp(type, "cdrom", 5) == 0) { free(type, M_XENSTORE); return (ENXIO); } free(type, M_XENSTORE); } device_set_desc(dev, "Virtual Block Device"); device_quiet(dev); return (0); } /* * Setup supplies the backend dir, virtual device. We place an event * channel and shared frame entries. We watch backend to wait if it's * ok. */ static int xbd_attach(device_t dev) { struct xbd_softc *sc; const char *name; uint32_t vdevice; int error; int i; int unit; /* FIXME: Use dynamic device id if this is not set. */ error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device", NULL, "%" PRIu32, &vdevice); if (error) error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device-ext", NULL, "%" PRIu32, &vdevice); if (error) { xenbus_dev_fatal(dev, error, "reading virtual-device"); device_printf(dev, "Couldn't determine virtual device.\n"); return (error); } xbd_vdevice_to_unit(vdevice, &unit, &name); if (!strcmp(name, "xbd")) device_set_unit(dev, unit); sc = device_get_softc(dev); mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF); xbd_initqs(sc); for (i = 0; i < XBD_MAX_RING_PAGES; i++) sc->xbd_ring_ref[i] = GRANT_REF_INVALID; sc->xbd_dev = dev; sc->xbd_vdevice = vdevice; sc->xbd_state = XBD_STATE_DISCONNECTED; xbd_setup_sysctl(sc); /* Wait for backend device to publish its protocol capabilities. */ xenbus_set_state(dev, XenbusStateInitialising); return (0); } static int xbd_detach(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev)); xbd_free(sc); mtx_destroy(&sc->xbd_io_lock); return 0; } static int xbd_suspend(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); int retval; int saved_state; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); saved_state = sc->xbd_state; sc->xbd_state = XBD_STATE_SUSPENDED; /* Wait for outstanding I/O to drain. */ retval = 0; while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { retval = EBUSY; break; } } mtx_unlock(&sc->xbd_io_lock); if (retval != 0) sc->xbd_state = saved_state; return (retval); } static int xbd_resume(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev)); xbd_free(sc); xbd_initialize(sc); return (0); } /** * Callback received when the backend's state changes. */ static void xbd_backend_changed(device_t dev, XenbusState backend_state) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("backend_state=%d\n", backend_state); switch (backend_state) { case XenbusStateUnknown: case XenbusStateInitialising: case XenbusStateReconfigured: case XenbusStateReconfiguring: case XenbusStateClosed: break; case XenbusStateInitWait: case XenbusStateInitialised: xbd_initialize(sc); break; case XenbusStateConnected: xbd_initialize(sc); xbd_connect(sc); break; case XenbusStateClosing: if (sc->xbd_users > 0) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else xbd_closing(dev); break; } } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xbd_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xbd_probe), DEVMETHOD(device_attach, xbd_attach), DEVMETHOD(device_detach, xbd_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xbd_suspend), DEVMETHOD(device_resume, xbd_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed), { 0, 0 } }; static driver_t xbd_driver = { "xbd", xbd_methods, sizeof(struct xbd_softc), }; devclass_t xbd_devclass; DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); Index: projects/clang370-import/sys/i386/i386/support.s =================================================================== --- projects/clang370-import/sys/i386/i386/support.s (revision 288925) +++ projects/clang370-import/sys/i386/i386/support.s (revision 288926) @@ -1,835 +1,834 @@ /*- * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_npx.h" #include #include -#include #include #include #include "assym.s" #define IDXSHIFT 10 .text /* * bcopy family * void bzero(void *buf, u_int len) */ ENTRY(bzero) pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx cld rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret END(bzero) ENTRY(sse2_pagezero) pushl %ebx movl 8(%esp),%ecx movl %ecx,%eax addl $4096,%eax xor %ebx,%ebx 1: movnti %ebx,(%ecx) addl $4,%ecx cmpl %ecx,%eax jne 1b sfence popl %ebx ret END(sse2_pagezero) ENTRY(i686_pagezero) pushl %edi pushl %ebx movl 12(%esp),%edi movl $1024,%ecx cld ALIGN_TEXT 1: xorl %eax,%eax repe scasl jnz 2f popl %ebx popl %edi ret ALIGN_TEXT 2: incl %ecx subl $4,%edi movl %ecx,%edx cmpl $16,%ecx jge 3f movl %edi,%ebx andl $0x3f,%ebx shrl %ebx shrl %ebx movl $16,%ecx subl %ebx,%ecx 3: subl %ecx,%edx rep stosl movl %edx,%ecx testl %edx,%edx jnz 1b popl %ebx popl %edi ret END(i686_pagezero) /* fillw(pat, base, cnt) */ ENTRY(fillw) pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx cld rep stosw popl %edi ret END(fillw) ENTRY(bcopyb) pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f cld /* nope, copy forwards */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards. */ addl %ecx,%esi decl %edi decl %esi std rep movsb popl %edi popl %esi cld ret END(bcopyb) /* * bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) pushl %ebp movl %esp,%ebp pushl %esi pushl %edi movl 8(%ebp),%esi movl 12(%ebp),%edi movl 16(%ebp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep movsl movl 16(%ebp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi popl %ebp ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi decl %edi decl %esi andl $3,%ecx /* any fractional bytes? */ std rep movsb movl 16(%ebp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld popl %ebp ret END(bcopy) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%ecx movl %edi,%eax shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %esi popl %edi ret END(memcpy) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines and possibly * the math- and DOS emulators should be the only places that do this. * * We have to access the memory with user's permissions, so use a segment * selector with RPL 3. For writes to user space we have to additionally * check the PTE for write permission, because the 386 does not check * write permissions when we are executing with EPL 0. The 486 does check * this if the WP bit is set in CR0, so we can use a simpler version here. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ /* * copyout(from_kernel, to_user, len) - MP SAFE */ ENTRY(copyout) movl PCPU(CURPCB),%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi pushl %ebx movl 16(%esp),%esi movl 20(%esp),%edi movl 24(%esp),%ebx testl %ebx,%ebx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movl %edi,%eax addl %ebx,%eax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ cmpl $VM_MAXUSER_ADDRESS,%eax ja copyout_fault /* bcopy(%esi, %edi, %ebx) */ movl %ebx,%ecx shrl $2,%ecx cld rep movsl movb %bl,%cl andb $3,%cl rep movsb done_copyout: popl %ebx popl %edi popl %esi xorl %eax,%eax movl PCPU(CURPCB),%edx movl %eax,PCB_ONFAULT(%edx) ret END(copyout) ALIGN_TEXT copyout_fault: popl %ebx popl %edi popl %esi movl PCPU(CURPCB),%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * copyin(from_user, to_kernel, len) - MP SAFE */ ENTRY(copyin) movl PCPU(CURPCB),%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi movl 12(%esp),%esi /* caddr_t from */ movl 16(%esp),%edi /* caddr_t to */ movl 20(%esp),%ecx /* size_t len */ /* * make sure address is valid */ movl %esi,%edx addl %ecx,%edx jc copyin_fault cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld rep movsl movb %al,%cl andb $3,%cl /* copy remaining bytes */ rep movsb popl %edi popl %esi xorl %eax,%eax movl PCPU(CURPCB),%edx movl %eax,PCB_ONFAULT(%edx) ret END(copyin) ALIGN_TEXT copyin_fault: popl %edi popl %esi movl PCPU(CURPCB),%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * casueword. Compare and set user word. Returns -1 on fault, * 0 on non-faulting access. The current value is in *oldp. */ ALTENTRY(casueword32) ENTRY(casueword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx /* dst */ movl 8(%esp),%eax /* old */ movl 16(%esp),%ecx /* new */ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ ja fusufault #ifdef SMP lock #endif cmpxchgl %ecx,(%edx) /* Compare and set. */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl 12(%esp),%edx /* oldp */ movl %eax,(%edx) xorl %eax,%eax ret END(casueword32) END(casueword) /* * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user * memory. */ ALTENTRY(fueword32) ENTRY(fueword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx /* from */ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ ja fusufault movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) movl 8(%esp),%edx movl %eax,(%edx) xorl %eax,%eax ret END(fueword32) END(fueword) /* * fuswintr() and suswintr() are specialized variants of fuword16() and * suword16(), respectively. They are called from the profiling code, * potentially at interrupt time. If they fail, that's okay; good things * will happen later. They always fail for now, until the trap code is * able to deal with this. */ ALTENTRY(suswintr) ENTRY(fuswintr) movl $-1,%eax ret END(suswintr) END(fuswintr) ENTRY(fuword16) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja fusufault movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret END(fuword16) ENTRY(fubyte) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja fusufault movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret END(fubyte) ALIGN_TEXT fusufault: movl PCPU(CURPCB),%ecx xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) decl %eax ret /* * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory. * All these functions are MPSAFE. */ ALTENTRY(suword32) ENTRY(suword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ ja fusufault movl 8(%esp),%eax movl %eax,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret END(suword32) END(suword) ENTRY(suword16) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ ja fusufault movw 8(%esp),%ax movw %ax,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx /* restore trashed register */ movl %eax,PCB_ONFAULT(%ecx) ret END(suword16) ENTRY(subyte) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ ja fusufault movb 8(%esp),%al movb %al,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx /* restore trashed register */ movl %eax,PCB_ONFAULT(%ecx) ret END(subyte) /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) pushl %esi pushl %edi movl PCPU(CURPCB),%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ movl $VM_MAXUSER_ADDRESS,%eax /* make sure 'from' is within bounds */ subl %esi,%eax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpl %edx,%eax jae 1f movl %eax,%edx movl %eax,20(%esp) 1: incl %edx cld 2: decl %edx jz 3f lodsb stosb orb %al,%al jnz 2b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp cpystrflt_x 3: /* edx is zero - return ENAMETOOLONG or EFAULT */ cmpl $VM_MAXUSER_ADDRESS,%esi jae cpystrflt 4: movl $ENAMETOOLONG,%eax jmp cpystrflt_x cpystrflt: movl $EFAULT,%eax cpystrflt_x: /* set *lencopied and return %eax */ movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 1f movl %ecx,(%edx) 1: popl %edi popl %esi ret END(copyinstr) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx cld 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret END(copystr) ENTRY(bcmp) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%edx movl %edx,%ecx shrl $2,%ecx cld /* compare forwards */ repe cmpsl jne 1f movl %edx,%ecx andl $3,%ecx repe cmpsb 1: setne %al movsbl %al,%eax popl %esi popl %edi ret END(bcmp) /* * Handling of special 386 registers and descriptor tables etc */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%gs movl %eax,%ss movl $KPSEL,%eax movl %eax,%fs /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax movl $KCSEL,4(%esp) MEXITCOUNT lret END(lgdt) /* ssdtosd(*ssdp,*sdp) */ ENTRY(ssdtosd) pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret END(ssdtosd) /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax movl %eax,%dr7 /* disable all breakpoints first */ movl %eax,%dr0 movl %eax,%dr1 movl %eax,%dr2 movl %eax,%dr3 movl %eax,%dr6 ret END(reset_dbregs) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx rdmsr movl 8(%esp),%ecx movl %eax,(%ecx) movl %edx,4(%ecx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * Support for writing MSRs in the safe manner. */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx movl 8(%esp),%eax movl 12(%esp),%edx wrmsr xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl $EFAULT,%eax ret Index: projects/clang370-import/sys/i386/include/intr_machdep.h =================================================================== --- projects/clang370-import/sys/i386/include/intr_machdep.h (revision 288925) +++ projects/clang370-import/sys/i386/include/intr_machdep.h (revision 288926) @@ -1,178 +1,179 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #ifdef _KERNEL /* * The maximum number of I/O interrupts we allow. This number is rather * arbitrary as it is just the maximum IRQ resource value. The interrupt * source for a given IRQ maps that I/O interrupt to device interrupt * source whether it be a pin on an interrupt controller or an MSI interrupt. * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device * interrupts allocate IDT vectors on demand. Currently we have 191 IDT * vectors available for device interrupts. On many systems with I/O APICs, * a lot of the IRQs are not used, so this number can be much larger than * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. * IRQ values from 256 to 767 are used by MSI. When running under the Xen * Hypervisor, IRQ values from 768 to 4863 are available for binding to * event channel events. We leave 255 unused to avoid confusion since 255 is * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 #ifdef XENHVM #include +#include #define NUM_EVTCHN_INTS NR_EVENT_CHANNELS #define FIRST_EVTCHN_INT \ (FIRST_MSI_INT + NUM_MSI_INTS) #define LAST_EVTCHN_INT \ (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) #else /* !XENHVM */ #define NUM_EVTCHN_INTS 0 #endif #define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 9 counters for each CPU for IPI counters for SMP. */ #ifdef SMP #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 9) * MAXCPU) #else #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif #ifndef LOCORE typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); void (*pic_reprogram_pin)(struct intsrc *); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; }; struct trapframe; extern struct mtx icu_lock; extern int elcr_found; #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(void); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intr_reprogram(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int* irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ Index: projects/clang370-import/sys/kern/sys_process.c =================================================================== --- projects/clang370-import/sys/kern/sys_process.c (revision 288925) +++ projects/clang370-import/sys/kern/sys_process.c (revision 288926) @@ -1,1329 +1,1334 @@ /*- * Copyright (c) 1994, Sean Eric Fagan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include struct ptrace_io_desc32 { int piod_op; uint32_t piod_offs; uint32_t piod_addr; uint32_t piod_len; }; struct ptrace_vm_entry32 { int pve_entry; int pve_timestamp; uint32_t pve_start; uint32_t pve_end; uint32_t pve_offset; u_int pve_prot; u_int pve_pathlen; int32_t pve_fileid; u_int pve_fsid; uint32_t pve_path; }; struct ptrace_lwpinfo32 { lwpid_t pl_lwpid; /* LWP described. */ int pl_event; /* Event that stopped the LWP. */ int pl_flags; /* LWP flags. */ sigset_t pl_sigmask; /* LWP signal mask */ sigset_t pl_siglist; /* LWP pending signal */ struct siginfo32 pl_siginfo; /* siginfo for signal */ char pl_tdname[MAXCOMLEN + 1]; /* LWP name. */ int pl_child_pid; /* New child pid */ u_int pl_syscall_code; u_int pl_syscall_narg; }; #endif /* * Functions implemented using PROC_ACTION(): * * proc_read_regs(proc, regs) * Get the current user-visible register set from the process * and copy it into the regs structure (). * The process is stopped at the time read_regs is called. * * proc_write_regs(proc, regs) * Update the current register set from the passed in regs * structure. Take care to avoid clobbering special CPU * registers or privileged bits in the PSL. * Depending on the architecture this may have fix-up work to do, * especially if the IAR or PCW are modified. * The process is stopped at the time write_regs is called. * * proc_read_fpregs, proc_write_fpregs * deal with the floating point register set, otherwise as above. * * proc_read_dbregs, proc_write_dbregs * deal with the processor debug register set, otherwise as above. * * proc_sstep(proc) * Arrange for the process to trap after executing a single instruction. */ #define PROC_ACTION(action) do { \ int error; \ \ PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); \ if ((td->td_proc->p_flag & P_INMEM) == 0) \ error = EIO; \ else \ error = (action); \ return (error); \ } while(0) int proc_read_regs(struct thread *td, struct reg *regs) { PROC_ACTION(fill_regs(td, regs)); } int proc_write_regs(struct thread *td, struct reg *regs) { PROC_ACTION(set_regs(td, regs)); } int proc_read_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(fill_dbregs(td, dbregs)); } int proc_write_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(set_dbregs(td, dbregs)); } /* * Ptrace doesn't support fpregs at all, and there are no security holes * or translations for fpregs, so we can just copy them. */ int proc_read_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(fill_fpregs(td, fpregs)); } int proc_write_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(set_fpregs(td, fpregs)); } #ifdef COMPAT_FREEBSD32 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */ int proc_read_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(fill_regs32(td, regs32)); } int proc_write_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(set_regs32(td, regs32)); } int proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(fill_dbregs32(td, dbregs32)); } int proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(set_dbregs32(td, dbregs32)); } int proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(fill_fpregs32(td, fpregs32)); } int proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(set_fpregs32(td, fpregs32)); } #endif int proc_sstep(struct thread *td) { PROC_ACTION(ptrace_single_step(td)); } int proc_rwmem(struct proc *p, struct uio *uio) { vm_map_t map; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; int error, fault_flags, page_offset, writing; /* * Assert that someone has locked this vmspace. (Should be * curthread but we can't assert that.) This keeps the process * from exiting out from under us until this operation completes. */ KASSERT(p->p_lock >= 1, ("%s: process %p (pid %d) not held", __func__, p, p->p_pid)); /* * The map we want... */ map = &p->p_vmspace->vm_map; /* * If we are writing, then we request vm_fault() to create a private * copy of each page. Since these copies will not be writeable by the * process, we must explicity request that they be dirtied. */ writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; /* * Only map in one page at a time. We don't have to, but it * makes things easier. This way is trivial - right? */ do { vm_offset_t uva; u_int len; vm_page_t m; uva = (vm_offset_t)uio->uio_offset; /* * Get the page number of this segment. */ pageno = trunc_page(uva); page_offset = uva - pageno; /* * How many bytes to copy */ len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* * Fault and hold the page on behalf of the process. */ error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; else error = EFAULT; break; } /* * Now do the i/o move. */ error = uiomove_fromphys(&m, page_offset, len, uio); /* Make the I-cache coherent for breakpoints. */ if (writing && error == 0) { vm_map_lock_read(map); if (vm_map_check_protection(map, pageno, pageno + PAGE_SIZE, VM_PROT_EXECUTE)) vm_sync_icache(map, uva, len); vm_map_unlock_read(map); } /* * Release the page. */ vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); return (error); } static int ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve) { struct vattr vattr; vm_map_t map; vm_map_entry_t entry; vm_object_t obj, tobj, lobj; struct vmspace *vm; struct vnode *vp; char *freepath, *fullpath; u_int pathlen; int error, index; error = 0; obj = NULL; vm = vmspace_acquire_ref(p); map = &vm->vm_map; vm_map_lock_read(map); do { entry = map->header.next; index = 0; while (index < pve->pve_entry && entry != &map->header) { entry = entry->next; index++; } if (index != pve->pve_entry) { error = EINVAL; break; } while (entry != &map->header && (entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) { entry = entry->next; index++; } if (entry == &map->header) { error = ENOENT; break; } /* We got an entry. */ pve->pve_entry = index + 1; pve->pve_timestamp = map->timestamp; pve->pve_start = entry->start; pve->pve_end = entry->end - 1; pve->pve_offset = entry->offset; pve->pve_prot = entry->protection; /* Backing object's path needed? */ if (pve->pve_pathlen == 0) break; pathlen = pve->pve_pathlen; pve->pve_pathlen = 0; obj = entry->object.vm_object; if (obj != NULL) VM_OBJECT_RLOCK(obj); } while (0); vm_map_unlock_read(map); vmspace_free(vm); pve->pve_fsid = VNOVAL; pve->pve_fileid = VNOVAL; if (error == 0 && obj != NULL) { lobj = obj; for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; pve->pve_offset += tobj->backing_object_offset; } vp = vm_object_vnode(lobj); if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { freepath = NULL; fullpath = NULL; vn_fullpath(td, vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) { pve->pve_fileid = vattr.va_fileid; pve->pve_fsid = vattr.va_fsid; } vput(vp); if (fullpath != NULL) { pve->pve_pathlen = strlen(fullpath) + 1; if (pve->pve_pathlen <= pathlen) { error = copyout(fullpath, pve->pve_path, pve->pve_pathlen); } else error = ENAMETOOLONG; } if (freepath != NULL) free(freepath, M_TEMP); } } if (error == 0) CTR3(KTR_PTRACE, "PT_VM_ENTRY: pid %d, entry %d, start %p", p->p_pid, pve->pve_entry, pve->pve_start); return (error); } #ifdef COMPAT_FREEBSD32 static int ptrace_vm_entry32(struct thread *td, struct proc *p, struct ptrace_vm_entry32 *pve32) { struct ptrace_vm_entry pve; int error; pve.pve_entry = pve32->pve_entry; pve.pve_pathlen = pve32->pve_pathlen; pve.pve_path = (void *)(uintptr_t)pve32->pve_path; error = ptrace_vm_entry(td, p, &pve); if (error == 0) { pve32->pve_entry = pve.pve_entry; pve32->pve_timestamp = pve.pve_timestamp; pve32->pve_start = pve.pve_start; pve32->pve_end = pve.pve_end; pve32->pve_offset = pve.pve_offset; pve32->pve_prot = pve.pve_prot; pve32->pve_fileid = pve.pve_fileid; pve32->pve_fsid = pve.pve_fsid; } pve32->pve_pathlen = pve.pve_pathlen; return (error); } static void ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl, struct ptrace_lwpinfo32 *pl32) { pl32->pl_lwpid = pl->pl_lwpid; pl32->pl_event = pl->pl_event; pl32->pl_flags = pl->pl_flags; pl32->pl_sigmask = pl->pl_sigmask; pl32->pl_siglist = pl->pl_siglist; siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo); strcpy(pl32->pl_tdname, pl->pl_tdname); pl32->pl_child_pid = pl->pl_child_pid; pl32->pl_syscall_code = pl->pl_syscall_code; pl32->pl_syscall_narg = pl->pl_syscall_narg; } #endif /* COMPAT_FREEBSD32 */ /* * Process debugging system call. */ #ifndef _SYS_SYSPROTO_H_ struct ptrace_args { int req; pid_t pid; caddr_t addr; int data; }; #endif #ifdef COMPAT_FREEBSD32 /* * This CPP subterfuge is to try and reduce the number of ifdefs in * the body of the code. * COPYIN(uap->addr, &r.reg, sizeof r.reg); * becomes either: * copyin(uap->addr, &r.reg, sizeof r.reg); * or * copyin(uap->addr, &r.reg32, sizeof r.reg32); * .. except this is done at runtime. */ #define COPYIN(u, k, s) wrap32 ? \ copyin(u, k ## 32, s ## 32) : \ copyin(u, k, s) #define COPYOUT(k, u, s) wrap32 ? \ copyout(k ## 32, u, s ## 32) : \ copyout(k, u, s) #else #define COPYIN(u, k, s) copyin(u, k, s) #define COPYOUT(k, u, s) copyout(k, u, s) #endif int sys_ptrace(struct thread *td, struct ptrace_args *uap) { /* * XXX this obfuscation is to reduce stack usage, but the register * structs may be too large to put on the stack anyway. */ union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct dbreg dbreg; struct fpreg fpreg; struct reg reg; #ifdef COMPAT_FREEBSD32 struct dbreg32 dbreg32; struct fpreg32 fpreg32; struct reg32 reg32; struct ptrace_io_desc32 piod32; struct ptrace_lwpinfo32 pl32; struct ptrace_vm_entry32 pve32; #endif } r; void *addr; int error = 0; #ifdef COMPAT_FREEBSD32 int wrap32 = 0; if (SV_CURPROC_FLAG(SV_ILP32)) wrap32 = 1; #endif AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; switch (uap->req) { case PT_GETREGS: case PT_GETFPREGS: case PT_GETDBREGS: case PT_LWPINFO: break; case PT_SETREGS: error = COPYIN(uap->addr, &r.reg, sizeof r.reg); break; case PT_SETFPREGS: error = COPYIN(uap->addr, &r.fpreg, sizeof r.fpreg); break; case PT_SETDBREGS: error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg); break; case PT_IO: error = COPYIN(uap->addr, &r.piod, sizeof r.piod); break; case PT_VM_ENTRY: error = COPYIN(uap->addr, &r.pve, sizeof r.pve); break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: error = COPYOUT(&r.pve, uap->addr, sizeof r.pve); break; case PT_IO: error = COPYOUT(&r.piod, uap->addr, sizeof r.piod); break; case PT_GETREGS: error = COPYOUT(&r.reg, uap->addr, sizeof r.reg); break; case PT_GETFPREGS: error = COPYOUT(&r.fpreg, uap->addr, sizeof r.fpreg); break; case PT_GETDBREGS: error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg); break; case PT_LWPINFO: error = copyout(&r.pl, uap->addr, uap->data); break; } return (error); } #undef COPYIN #undef COPYOUT #ifdef COMPAT_FREEBSD32 /* * PROC_READ(regs, td2, addr); * becomes either: * proc_read_regs(td2, addr); * or * proc_read_regs32(td2, addr); * .. except this is done at runtime. There is an additional * complication in that PROC_WRITE disallows 32 bit consumers * from writing to 64 bit address space targets. */ #define PROC_READ(w, t, a) wrap32 ? \ proc_read_ ## w ## 32(t, a) : \ proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) wrap32 ? \ (safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \ proc_write_ ## w (t, a) #else #define PROC_READ(w, t, a) proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) proc_write_ ## w (t, a) #endif int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) { struct iovec iov; struct uio uio; struct proc *curp, *p, *pp; struct thread *td2 = NULL, *td3; struct ptrace_io_desc *piod = NULL; struct ptrace_lwpinfo *pl; int error, write, tmp, num; int proctree_locked = 0; lwpid_t tid = 0, *buf; #ifdef COMPAT_FREEBSD32 int wrap32 = 0, safe = 0; struct ptrace_io_desc32 *piod32 = NULL; struct ptrace_lwpinfo32 *pl32 = NULL; struct ptrace_lwpinfo plr; #endif curp = td->td_proc; /* Lock proctree before locking the process. */ switch (req) { case PT_TRACE_ME: case PT_ATTACH: case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_FOLLOW_FORK: case PT_DETACH: sx_xlock(&proctree_lock); proctree_locked = 1; break; default: break; } write = 0; if (req == PT_TRACE_ME) { p = td->td_proc; PROC_LOCK(p); } else { if (pid <= PID_MAX) { if ((p = pfind(pid)) == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } } else { td2 = tdfind(pid, -1); if (td2 == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } p = td2->td_proc; tid = pid; pid = p->p_pid; } } AUDIT_ARG_PROCESS(p); if ((p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto fail; } if ((error = p_cansee(td, p)) != 0) goto fail; if ((error = p_candebug(td, p)) != 0) goto fail; /* * System processes can't be debugged. */ if ((p->p_flag & P_SYSTEM) != 0) { error = EINVAL; goto fail; } if (tid == 0) { if ((p->p_flag & P_STOPPED_TRACE) != 0) { KASSERT(p->p_xthread != NULL, ("NULL p_xthread")); td2 = p->p_xthread; } else { td2 = FIRST_THREAD_IN_PROC(p); } tid = td2->td_tid; } #ifdef COMPAT_FREEBSD32 /* * Test if we're a 32 bit client and what the target is. * Set the wrap controls accordingly. */ if (SV_CURPROC_FLAG(SV_ILP32)) { if (SV_PROC_FLAG(td2->td_proc, SV_ILP32)) safe = 1; wrap32 = 1; } #endif /* * Permissions check */ switch (req) { case PT_TRACE_ME: /* Always legal. */ break; case PT_ATTACH: /* Self */ if (p->p_pid == td->td_proc->p_pid) { error = EINVAL; goto fail; } /* Already traced */ if (p->p_flag & P_TRACED) { error = EBUSY; goto fail; } /* Can't trace an ancestor if you're being traced. */ if (curp->p_flag & P_TRACED) { for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) { if (pp == p) { error = EINVAL; goto fail; } } } /* OK */ break; case PT_CLEARSTEP: /* Allow thread to clear single step for itself */ if (td->td_tid == tid) break; /* FALLTHROUGH */ default: /* not being traced... */ if ((p->p_flag & P_TRACED) == 0) { error = EPERM; goto fail; } /* not being traced by YOU */ if (p->p_pptr != td->td_proc) { error = EBUSY; goto fail; } /* not currently stopped */ if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) == 0 || p->p_suspcount != p->p_numthreads || (p->p_flag & P_WAITED) == 0) { error = EBUSY; goto fail; } if ((p->p_flag & P_STOPPED_TRACE) == 0) { static int count = 0; if (count++ == 0) printf("P_STOPPED_TRACE not set.\n"); } /* OK */ break; } /* Keep this process around until we finish this request. */ _PHOLD(p); #ifdef FIX_SSTEP /* * Single step fixup ala procfs */ FIX_SSTEP(td2); #endif /* * Actually do the requests */ td->td_retval[0] = 0; switch (req) { case PT_TRACE_ME: /* set my trace flag and "owner" so it can read/write me */ p->p_flag |= P_TRACED; if (p->p_flag & P_PPWAIT) p->p_flag |= P_PPTRACE; p->p_oppid = p->p_pptr->p_pid; CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid); break; case PT_ATTACH: /* security check done above */ /* * It would be nice if the tracing relationship was separate * from the parent relationship but that would require * another set of links in the proc struct or for "wait" * to scan the entire proc table. To make life easier, * we just re-parent the process we're trying to trace. * The old parent is remembered so we can put things back * on a "detach". */ p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; if (p->p_pptr != td->td_proc) { proc_reparent(p, td->td_proc); } data = SIGSTOP; CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid, p->p_oppid); goto sendsig; /* in PT_CONTINUE below */ case PT_CLEARSTEP: CTR2(KTR_PTRACE, "PT_CLEARSTEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_clear_single_step(td2); break; case PT_SETSTEP: CTR2(KTR_PTRACE, "PT_SETSTEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_single_step(td2); break; case PT_SUSPEND: CTR2(KTR_PTRACE, "PT_SUSPEND: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_SUSPEND; thread_lock(td2); td2->td_flags |= TDF_NEEDSUSPCHK; thread_unlock(td2); break; case PT_RESUME: CTR2(KTR_PTRACE, "PT_RESUME: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags &= ~TDB_SUSPEND; break; case PT_FOLLOW_FORK: CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid, p->p_flag & P_FOLLOWFORK ? "enabled" : "disabled", data ? "enabled" : "disabled"); if (data) p->p_flag |= P_FOLLOWFORK; else p->p_flag &= ~P_FOLLOWFORK; break; case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_DETACH: /* Zero means do not send any signal */ if (data < 0 || data > _SIG_MAXSIG) { error = EINVAL; break; } switch (req) { case PT_STEP: CTR2(KTR_PTRACE, "PT_STEP: tid %d (pid %d)", td2->td_tid, p->p_pid); error = ptrace_single_step(td2); if (error) goto out; break; case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: if (addr != (void *)1) { error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr); if (error) goto out; } switch (req) { case PT_TO_SCE: p->p_stops |= S_PT_SCE; - CTR2(KTR_PTRACE, - "PT_TO_SCE: pid %d, stops = %#x", p->p_pid, - p->p_stops); + CTR4(KTR_PTRACE, + "PT_TO_SCE: pid %d, stops = %#x, PC = %#lx, sig = %d", + p->p_pid, p->p_stops, + (u_long)(uintfptr_t)addr, data); break; case PT_TO_SCX: p->p_stops |= S_PT_SCX; - CTR2(KTR_PTRACE, - "PT_TO_SCX: pid %d, stops = %#x", p->p_pid, - p->p_stops); + CTR4(KTR_PTRACE, + "PT_TO_SCX: pid %d, stops = %#x, PC = %#lx, sig = %d", + p->p_pid, p->p_stops, + (u_long)(uintfptr_t)addr, data); break; case PT_SYSCALL: p->p_stops |= S_PT_SCE | S_PT_SCX; - CTR2(KTR_PTRACE, - "PT_SYSCALL: pid %d, stops = %#x", p->p_pid, - p->p_stops); + CTR4(KTR_PTRACE, + "PT_SYSCALL: pid %d, stops = %#x, PC = %#lx, sig = %d", + p->p_pid, p->p_stops, + (u_long)(uintfptr_t)addr, data); break; case PT_CONTINUE: - CTR1(KTR_PTRACE, - "PT_CONTINUE: pid %d", p->p_pid); + CTR3(KTR_PTRACE, + "PT_CONTINUE: pid %d, PC = %#lx, sig = %d", + p->p_pid, (u_long)(uintfptr_t)addr, data); break; } break; case PT_DETACH: /* * Reset the process parent. * * NB: This clears P_TRACED before reparenting * a detached process back to its original * parent. Otherwise the debugee will be set * as an orphan of the debugger. */ p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK); if (p->p_oppid != p->p_pptr->p_pid) { PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); pp = proc_realparent(p); proc_reparent(p, pp); if (pp == initproc) p->p_sigparent = SIGCHLD; - CTR2(KTR_PTRACE, - "PT_DETACH: pid %d reparented to pid %d", - p->p_pid, pp->p_pid); + CTR3(KTR_PTRACE, + "PT_DETACH: pid %d reparented to pid %d, sig %d", + p->p_pid, pp->p_pid, data); } else - CTR1(KTR_PTRACE, "PT_DETACH: pid %d", p->p_pid); + CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d", + p->p_pid, data); p->p_oppid = 0; p->p_stops = 0; /* should we send SIGCHLD? */ /* childproc_continued(p); */ break; } sendsig: if (proctree_locked) { sx_xunlock(&proctree_lock); proctree_locked = 0; } p->p_xsig = data; p->p_xthread = NULL; if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) { /* deliver or queue signal */ td2->td_dbgflags &= ~TDB_XSIG; td2->td_xsig = data; if (req == PT_DETACH) { FOREACH_THREAD_IN_PROC(p, td3) td3->td_dbgflags &= ~TDB_SUSPEND; } /* * unsuspend all threads, to not let a thread run, * you should use PT_SUSPEND to suspend it before * continuing process. */ PROC_SLOCK(p); p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED); thread_unsuspend(p); PROC_SUNLOCK(p); if (req == PT_ATTACH) kern_psignal(p, data); } else { if (data) kern_psignal(p, data); } break; case PT_WRITE_I: case PT_WRITE_D: td2->td_dbgflags |= TDB_USERWR; write = 1; /* FALLTHROUGH */ case PT_READ_I: case PT_READ_D: PROC_UNLOCK(p); tmp = 0; /* write = 0 set above */ iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp; iov.iov_len = sizeof(int); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(uintptr_t)addr; uio.uio_resid = sizeof(int); uio.uio_segflg = UIO_SYSSPACE; /* i.e.: the uap */ uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_td = td; error = proc_rwmem(p, &uio); if (uio.uio_resid != 0) { /* * XXX proc_rwmem() doesn't currently return ENOSPC, * so I think write() can bogusly return 0. * XXX what happens for short writes? We don't want * to write partial data. * XXX proc_rwmem() returns EPERM for other invalid * addresses. Convert this to EINVAL. Does this * clobber returns of EPERM for other reasons? */ if (error == 0 || error == ENOSPC || error == EPERM) error = EINVAL; /* EOF */ } if (!write) td->td_retval[0] = tmp; if (error == 0) { if (write) CTR3(KTR_PTRACE, "PT_WRITE: pid %d: %p <= %#x", p->p_pid, addr, data); else CTR3(KTR_PTRACE, "PT_READ: pid %d: %p >= %#x", p->p_pid, addr, tmp); } PROC_LOCK(p); break; case PT_IO: #ifdef COMPAT_FREEBSD32 if (wrap32) { piod32 = addr; iov.iov_base = (void *)(uintptr_t)piod32->piod_addr; iov.iov_len = piod32->piod_len; uio.uio_offset = (off_t)(uintptr_t)piod32->piod_offs; uio.uio_resid = piod32->piod_len; } else #endif { piod = addr; iov.iov_base = piod->piod_addr; iov.iov_len = piod->piod_len; uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs; uio.uio_resid = piod->piod_len; } uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_USERSPACE; uio.uio_td = td; #ifdef COMPAT_FREEBSD32 tmp = wrap32 ? piod32->piod_op : piod->piod_op; #else tmp = piod->piod_op; #endif switch (tmp) { case PIOD_READ_D: case PIOD_READ_I: CTR3(KTR_PTRACE, "PT_IO: pid %d: READ (%p, %#x)", p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid); uio.uio_rw = UIO_READ; break; case PIOD_WRITE_D: case PIOD_WRITE_I: CTR3(KTR_PTRACE, "PT_IO: pid %d: WRITE (%p, %#x)", p->p_pid, (uintptr_t)uio.uio_offset, uio.uio_resid); td2->td_dbgflags |= TDB_USERWR; uio.uio_rw = UIO_WRITE; break; default: error = EINVAL; goto out; } PROC_UNLOCK(p); error = proc_rwmem(p, &uio); #ifdef COMPAT_FREEBSD32 if (wrap32) piod32->piod_len -= uio.uio_resid; else #endif piod->piod_len -= uio.uio_resid; PROC_LOCK(p); break; case PT_KILL: CTR1(KTR_PTRACE, "PT_KILL: pid %d", p->p_pid); data = SIGKILL; goto sendsig; /* in PT_CONTINUE above */ case PT_SETREGS: CTR2(KTR_PTRACE, "PT_SETREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(regs, td2, addr); break; case PT_GETREGS: CTR2(KTR_PTRACE, "PT_GETREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(regs, td2, addr); break; case PT_SETFPREGS: CTR2(KTR_PTRACE, "PT_SETFPREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(fpregs, td2, addr); break; case PT_GETFPREGS: CTR2(KTR_PTRACE, "PT_GETFPREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(fpregs, td2, addr); break; case PT_SETDBREGS: CTR2(KTR_PTRACE, "PT_SETDBREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(dbregs, td2, addr); break; case PT_GETDBREGS: CTR2(KTR_PTRACE, "PT_GETDBREGS: tid %d (pid %d)", td2->td_tid, p->p_pid); error = PROC_READ(dbregs, td2, addr); break; case PT_LWPINFO: if (data <= 0 || #ifdef COMPAT_FREEBSD32 (!wrap32 && data > sizeof(*pl)) || (wrap32 && data > sizeof(*pl32))) { #else data > sizeof(*pl)) { #endif error = EINVAL; break; } #ifdef COMPAT_FREEBSD32 if (wrap32) { pl = &plr; pl32 = addr; } else #endif pl = addr; pl->pl_lwpid = td2->td_tid; pl->pl_event = PL_EVENT_NONE; pl->pl_flags = 0; if (td2->td_dbgflags & TDB_XSIG) { pl->pl_event = PL_EVENT_SIGNAL; if (td2->td_dbgksi.ksi_signo != 0 && #ifdef COMPAT_FREEBSD32 ((!wrap32 && data >= offsetof(struct ptrace_lwpinfo, pl_siginfo) + sizeof(pl->pl_siginfo)) || (wrap32 && data >= offsetof(struct ptrace_lwpinfo32, pl_siginfo) + sizeof(struct siginfo32))) #else data >= offsetof(struct ptrace_lwpinfo, pl_siginfo) + sizeof(pl->pl_siginfo) #endif ){ pl->pl_flags |= PL_FLAG_SI; pl->pl_siginfo = td2->td_dbgksi.ksi_info; } } if ((pl->pl_flags & PL_FLAG_SI) == 0) bzero(&pl->pl_siginfo, sizeof(pl->pl_siginfo)); if (td2->td_dbgflags & TDB_SCE) pl->pl_flags |= PL_FLAG_SCE; else if (td2->td_dbgflags & TDB_SCX) pl->pl_flags |= PL_FLAG_SCX; if (td2->td_dbgflags & TDB_EXEC) pl->pl_flags |= PL_FLAG_EXEC; if (td2->td_dbgflags & TDB_FORK) { pl->pl_flags |= PL_FLAG_FORKED; pl->pl_child_pid = td2->td_dbg_forked; } if (td2->td_dbgflags & TDB_CHILD) pl->pl_flags |= PL_FLAG_CHILD; pl->pl_sigmask = td2->td_sigmask; pl->pl_siglist = td2->td_siglist; strcpy(pl->pl_tdname, td2->td_name); if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) { pl->pl_syscall_code = td2->td_dbg_sc_code; pl->pl_syscall_narg = td2->td_dbg_sc_narg; } else { pl->pl_syscall_code = 0; pl->pl_syscall_narg = 0; } #ifdef COMPAT_FREEBSD32 if (wrap32) ptrace_lwpinfo_to32(pl, pl32); #endif - CTR5(KTR_PTRACE, - "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d", + CTR6(KTR_PTRACE, + "PT_LWPINFO: tid %d (pid %d) event %d flags %#x child pid %d syscall %d", td2->td_tid, p->p_pid, pl->pl_event, pl->pl_flags, - pl->pl_child_pid); + pl->pl_child_pid, pl->pl_syscall_code); break; case PT_GETNUMLWPS: CTR2(KTR_PTRACE, "PT_GETNUMLWPS: pid %d: %d threads", p->p_pid, p->p_numthreads); td->td_retval[0] = p->p_numthreads; break; case PT_GETLWPLIST: CTR3(KTR_PTRACE, "PT_GETLWPLIST: pid %d: data %d, actual %d", p->p_pid, data, p->p_numthreads); if (data <= 0) { error = EINVAL; break; } num = imin(p->p_numthreads, data); PROC_UNLOCK(p); buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK); tmp = 0; PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (tmp >= num) break; buf[tmp++] = td2->td_tid; } PROC_UNLOCK(p); error = copyout(buf, addr, tmp * sizeof(lwpid_t)); free(buf, M_TEMP); if (!error) td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_VM_TIMESTAMP: CTR2(KTR_PTRACE, "PT_VM_TIMESTAMP: pid %d: timestamp %d", p->p_pid, p->p_vmspace->vm_map.timestamp); td->td_retval[0] = p->p_vmspace->vm_map.timestamp; break; case PT_VM_ENTRY: PROC_UNLOCK(p); #ifdef COMPAT_FREEBSD32 if (wrap32) error = ptrace_vm_entry32(td, p, addr); else #endif error = ptrace_vm_entry(td, p, addr); PROC_LOCK(p); break; default: #ifdef __HAVE_PTRACE_MACHDEP if (req >= PT_FIRSTMACH) { PROC_UNLOCK(p); error = cpu_ptrace(td2, req, addr, data); PROC_LOCK(p); } else #endif /* Unknown request. */ error = EINVAL; break; } out: /* Drop our hold on this process now that the request has completed. */ _PRELE(p); fail: PROC_UNLOCK(p); if (proctree_locked) sx_xunlock(&proctree_lock); return (error); } #undef PROC_READ #undef PROC_WRITE /* * Stop a process because of a debugging event; * stay stopped until p->p_step is cleared * (cleared by PIOCCONT in procfs). */ void stopevent(struct proc *p, unsigned int event, unsigned int val) { PROC_LOCK_ASSERT(p, MA_OWNED); p->p_step = 1; CTR3(KTR_PTRACE, "stopevent: pid %d event %u val %u", p->p_pid, event, val); do { if (event != S_EXIT) p->p_xsig = val; p->p_xthread = NULL; p->p_stype = event; /* Which event caused the stop? */ wakeup(&p->p_stype); /* Wake up any PIOCWAIT'ing procs */ msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0); } while (p->p_step); } Index: projects/clang370-import/sys/kern/uipc_mbuf.c =================================================================== --- projects/clang370-import/sys/kern/uipc_mbuf.c (revision 288925) +++ projects/clang370-import/sys/kern/uipc_mbuf.c (revision 288926) @@ -1,2059 +1,2056 @@ /*- * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include "opt_mbuf_stress_test.h" #include "opt_mbuf_profiling.h" #include #include #include #include #include #include #include #include #include #include #include int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; #ifdef MBUF_STRESS_TEST int m_defragpackets; int m_defragbytes; int m_defraguseless; int m_defragfailure; int m_defragrandomfailures; #endif /* * sysctl(8) exported objects */ SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD, &max_linkhdr, 0, "Size of largest link layer header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD, &max_protohdr, 0, "Size of largest protocol layer header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD, &max_hdr, 0, "Size of largest link plus protocol header"); SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD, &max_datalen, 0, "Minimum space left in mbuf after max_hdr"); #ifdef MBUF_STRESS_TEST SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, &m_defragpackets, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, &m_defragbytes, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, &m_defraguseless, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, &m_defragfailure, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, &m_defragrandomfailures, 0, ""); #endif /* * Ensure the correct size of various mbuf parameters. It could be off due * to compiler-induced padding and alignment artifacts. */ CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN); CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN); /* * mbuf data storage should be 64-bit aligned regardless of architectural * pointer size; check this is the case with and without a packet header. */ CTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0); CTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0); /* * While the specific values here don't matter too much (i.e., +/- a few * words), we do want to ensure that changes to these values are carefully * reasoned about and properly documented. This is especially the case as * network-protocol and device-driver modules encode these layouts, and must * be recompiled if the structures change. Check these values at compile time * against the ones documented in comments in mbuf.h. * * NB: Possibly they should be documented there via #define's and not just * comments. */ #if defined(__LP64__) CTASSERT(offsetof(struct mbuf, m_dat) == 32); CTASSERT(sizeof(struct pkthdr) == 56); CTASSERT(sizeof(struct m_ext) == 48); #else CTASSERT(offsetof(struct mbuf, m_dat) == 24); CTASSERT(sizeof(struct pkthdr) == 48); CTASSERT(sizeof(struct m_ext) == 28); #endif /* * Assert that the queue(3) macros produce code of the same size as an old * plain pointer does. */ #ifdef INVARIANTS static struct mbuf m_assertbuf; CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); CTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt)); #endif /* * m_get2() allocates minimum mbuf that would fit "size" argument. */ struct mbuf * m_get2(int size, int how, short type, int flags) { struct mb_args args; struct mbuf *m, *n; args.flags = flags; args.type = type; if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) return (uma_zalloc_arg(zone_mbuf, &args, how)); if (size <= MCLBYTES) return (uma_zalloc_arg(zone_pack, &args, how)); if (size > MJUMPAGESIZE) return (NULL); m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); n = uma_zalloc_arg(zone_jumbop, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * m_getjcl() returns an mbuf with a cluster of the specified size attached. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ struct mbuf * m_getjcl(int how, short type, int flags, int size) { struct mb_args args; struct mbuf *m, *n; uma_zone_t zone; if (size == MCLBYTES) return m_getcl(how, type, flags); args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); zone = m_getzone(size); n = uma_zalloc_arg(zone, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an * existing mbuf chain is provided, then we will append the new chain * to the existing one but still return the top of the newly allocated * chain. */ struct mbuf * m_getm2(struct mbuf *m, int len, int how, short type, int flags) { struct mbuf *mb, *nm = NULL, *mtail = NULL; KASSERT(len >= 0, ("%s: len is < 0", __func__)); /* Validate flags. */ flags &= (M_PKTHDR | M_EOR); /* Packet header mbuf must be first in chain. */ if ((flags & M_PKTHDR) && m != NULL) flags &= ~M_PKTHDR; /* Loop and append maximum sized mbufs to the chain tail. */ while (len > 0) { if (len > MCLBYTES) mb = m_getjcl(how, type, (flags & M_PKTHDR), MJUMPAGESIZE); else if (len >= MINCLSIZE) mb = m_getcl(how, type, (flags & M_PKTHDR)); else if (flags & M_PKTHDR) mb = m_gethdr(how, type); else mb = m_get(how, type); /* Fail the whole operation if one mbuf can't be allocated. */ if (mb == NULL) { if (nm != NULL) m_freem(nm); return (NULL); } /* Book keeping. */ len -= M_SIZE(mb); if (mtail != NULL) mtail->m_next = mb; else nm = mb; mtail = mb; flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ } if (flags & M_EOR) mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ /* If mbuf was supplied, append new chain to the end of it. */ if (m != NULL) { for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) ; mtail->m_next = nm; mtail->m_flags &= ~M_EOR; } else m = nm; return (m); } /* * Free an entire chain of mbufs and associated external buffers, if * applicable. */ void m_freem(struct mbuf *mb) { while (mb != NULL) mb = m_free(mb); } /*- * Configure a provided mbuf to refer to the provided external storage * buffer and setup a reference count for said buffer. If the setting * up of the reference count fails, the M_EXT bit will not be set. If * successfull, the M_EXT bit is set in the mbuf's flags. * * Arguments: * mb The existing mbuf to which to attach the provided buffer. * buf The address of the provided external storage buffer. * size The size of the provided buffer. * freef A pointer to a routine that is responsible for freeing the * provided external storage buffer. * args A pointer to an argument structure (of any type) to be passed * to the provided freef routine (may be NULL). * flags Any other flags to be passed to the provided mbuf. * type The type that the external storage buffer should be * labeled with. * * Returns: * Nothing. */ int m_extadd(struct mbuf *mb, caddr_t buf, u_int size, void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2, int flags, int type, int wait) { KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); if (type != EXT_EXTREF) mb->m_ext.ext_cnt = uma_zalloc(zone_ext_refcnt, wait); if (mb->m_ext.ext_cnt == NULL) return (ENOMEM); *(mb->m_ext.ext_cnt) = 1; mb->m_flags |= (M_EXT | flags); mb->m_ext.ext_buf = buf; mb->m_data = mb->m_ext.ext_buf; mb->m_ext.ext_size = size; mb->m_ext.ext_free = freef; mb->m_ext.ext_arg1 = arg1; mb->m_ext.ext_arg2 = arg2; mb->m_ext.ext_type = type; mb->m_ext.ext_flags = 0; return (0); } /* * Non-directly-exported function to clean up after mbufs with M_EXT * storage attached to them if the reference count hits 1. */ void mb_free_ext(struct mbuf *m) { int freembuf; KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); /* * Check if the header is embedded in the cluster. */ freembuf = (m->m_flags & M_NOFREE) ? 0 : 1; switch (m->m_ext.ext_type) { case EXT_SFBUF: sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; default: KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); /* * Free attached storage if this mbuf is the only * reference to it. */ if (*(m->m_ext.ext_cnt) != 1) { if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1) break; } switch (m->m_ext.ext_type) { case EXT_PACKET: /* The packet zone is special. */ if (*(m->m_ext.ext_cnt) == 0) *(m->m_ext.ext_cnt) = 1; uma_zfree(zone_pack, m); return; /* Job done. */ case EXT_CLUSTER: uma_zfree(zone_clust, m->m_ext.ext_buf); break; case EXT_JUMBOP: uma_zfree(zone_jumbop, m->m_ext.ext_buf); break; case EXT_JUMBO9: uma_zfree(zone_jumbo9, m->m_ext.ext_buf); break; case EXT_JUMBO16: uma_zfree(zone_jumbo16, m->m_ext.ext_buf); break; case EXT_NET_DRV: case EXT_MOD_TYPE: case EXT_DISPOSABLE: *(m->m_ext.ext_cnt) = 0; uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, m->m_ext.ext_cnt)); /* FALLTHROUGH */ case EXT_EXTREF: KASSERT(m->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; default: KASSERT(m->m_ext.ext_type == 0, ("%s: unknown ext_type", __func__)); } } if (freembuf) uma_zfree(zone_mbuf, m); } /* * Attach the cluster from *m to *n, set up m_ext in *n * and bump the refcount of the cluster. */ static void mb_dupcl(struct mbuf *n, const struct mbuf *m) { KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); switch (m->m_ext.ext_type) { case EXT_SFBUF: sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2); break; default: KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); if (*(m->m_ext.ext_cnt) == 1) *(m->m_ext.ext_cnt) += 1; else atomic_add_int(m->m_ext.ext_cnt, 1); } n->m_ext = m->m_ext; n->m_flags |= M_EXT; n->m_flags |= m->m_flags & M_RDONLY; } void m_demote_pkthdr(struct mbuf *m) { M_ASSERTPKTHDR(m); m_tag_delete_chain(m, NULL); m->m_flags &= ~M_PKTHDR; bzero(&m->m_pkthdr, sizeof(struct pkthdr)); } /* * Clean up mbuf (chain) from any tags and packet headers. * If "all" is set then the first mbuf in the chain will be * cleaned too. */ void m_demote(struct mbuf *m0, int all, int flags) { struct mbuf *m; for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) { KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p", __func__, m, m0)); if (m->m_flags & M_PKTHDR) m_demote_pkthdr(m); m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags); } } /* * Sanity checks on mbuf (chain) for use in KASSERT() and general * debugging. * Returns 0 or panics when bad and 1 on all tests passed. * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they * blow up later. */ int m_sanity(struct mbuf *m0, int sanitize) { struct mbuf *m; caddr_t a, b; int pktlen = 0; #ifdef INVARIANTS #define M_SANITY_ACTION(s) panic("mbuf %p: " s, m) #else #define M_SANITY_ACTION(s) printf("mbuf %p: " s, m) #endif for (m = m0; m != NULL; m = m->m_next) { /* * Basic pointer checks. If any of these fails then some * unrelated kernel memory before or after us is trashed. * No way to recover from that. */ a = M_START(m); b = a + M_SIZE(m); if ((caddr_t)m->m_data < a) M_SANITY_ACTION("m_data outside mbuf data range left"); if ((caddr_t)m->m_data > b) M_SANITY_ACTION("m_data outside mbuf data range right"); if ((caddr_t)m->m_data + m->m_len > b) M_SANITY_ACTION("m_data + m_len exeeds mbuf space"); /* m->m_nextpkt may only be set on first mbuf in chain. */ if (m != m0 && m->m_nextpkt != NULL) { if (sanitize) { m_freem(m->m_nextpkt); m->m_nextpkt = (struct mbuf *)0xDEADC0DE; } else M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf"); } /* packet length (not mbuf length!) calculation */ if (m0->m_flags & M_PKTHDR) pktlen += m->m_len; /* m_tags may only be attached to first mbuf in chain. */ if (m != m0 && m->m_flags & M_PKTHDR && !SLIST_EMPTY(&m->m_pkthdr.tags)) { if (sanitize) { m_tag_delete_chain(m, NULL); /* put in 0xDEADC0DE perhaps? */ } else M_SANITY_ACTION("m_tags on in-chain mbuf"); } /* M_PKTHDR may only be set on first mbuf in chain */ if (m != m0 && m->m_flags & M_PKTHDR) { if (sanitize) { bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); m->m_flags &= ~M_PKTHDR; /* put in 0xDEADCODE and leave hdr flag in */ } else M_SANITY_ACTION("M_PKTHDR on in-chain mbuf"); } } m = m0; if (pktlen && pktlen != m->m_pkthdr.len) { if (sanitize) m->m_pkthdr.len = 0; else M_SANITY_ACTION("m_pkthdr.len != mbuf chain length"); } return 1; #undef M_SANITY_ACTION } /* * "Move" mbuf pkthdr from "from" to "to". * "from" must have M_PKTHDR set, and "to" must be empty. */ void m_move_pkthdr(struct mbuf *to, struct mbuf *from) { #if 0 /* see below for why these are not enabled */ M_ASSERTPKTHDR(to); /* Note: with MAC, this may not be a good assertion. */ KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_move_pkthdr: to has tags")); #endif #ifdef MAC /* * XXXMAC: It could be this should also occur for non-MAC? */ if (to->m_flags & M_PKTHDR) m_tag_delete_chain(to, NULL); #endif to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; /* especially tags */ SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ from->m_flags &= ~M_PKTHDR; } /* * Duplicate "from"'s mbuf pkthdr in "to". * "from" must have M_PKTHDR set, and "to" must be empty. * In particular, this does a deep copy of the packet tags. */ int m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) { #if 0 /* * The mbuf allocator only initializes the pkthdr * when the mbuf is allocated with m_gethdr(). Many users * (e.g. m_copy*, m_prepend) use m_get() and then * smash the pkthdr as needed causing these * assertions to trip. For now just disable them. */ M_ASSERTPKTHDR(to); /* Note: with MAC, this may not be a good assertion. */ KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags")); #endif MBUF_CHECKSLEEP(how); #ifdef MAC if (to->m_flags & M_PKTHDR) m_tag_delete_chain(to, NULL); #endif to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; SLIST_INIT(&to->m_pkthdr.tags); return (m_tag_copy_chain(to, from, how)); } /* * Lesser-used path for M_PREPEND: * allocate new mbuf to prepend to chain, * copy junk along. */ struct mbuf * m_prepend(struct mbuf *m, int len, int how) { struct mbuf *mn; if (m->m_flags & M_PKTHDR) mn = m_gethdr(how, m->m_type); else mn = m_get(how, m->m_type); if (mn == NULL) { m_freem(m); return (NULL); } if (m->m_flags & M_PKTHDR) m_move_pkthdr(mn, m); mn->m_next = m; m = mn; if (len < M_SIZE(m)) M_ALIGN(m, len); m->m_len = len; return (m); } /* * Make a copy of an mbuf chain starting "off0" bytes from the beginning, * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller. * Note that the copy is read-only, because clusters are not copied, * only their reference counts are incremented. */ struct mbuf * m_copym(const struct mbuf *m, int off0, int len, int wait) { struct mbuf *n, **np; int off = off0; struct mbuf *top; int copyhdr = 0; KASSERT(off >= 0, ("m_copym, negative off %d", off)); KASSERT(len >= 0, ("m_copym, negative len %d", len)); MBUF_CHECKSLEEP(wait); if (off == 0 && m->m_flags & M_PKTHDR) copyhdr = 1; while (off > 0) { KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } np = ⊤ top = 0; while (len > 0) { if (m == NULL) { KASSERT(len == M_COPYALL, ("m_copym, length > size of mbuf chain")); break; } if (copyhdr) n = m_gethdr(wait, m->m_type); else n = m_get(wait, m->m_type); *np = n; if (n == NULL) goto nospace; if (copyhdr) { if (!m_dup_pkthdr(n, m, wait)) goto nospace; if (len == M_COPYALL) n->m_pkthdr.len -= off0; else n->m_pkthdr.len = len; copyhdr = 0; } n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { n->m_data = m->m_data + off; mb_dupcl(n, m); } else bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), (u_int)n->m_len); if (len != M_COPYALL) len -= n->m_len; off = 0; m = m->m_next; np = &n->m_next; } return (top); nospace: m_freem(top); return (NULL); } /* * Copy an entire packet, including header (which must be present). * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. * Note that the copy is read-only, because clusters are not copied, * only their reference counts are incremented. * Preserve alignment of the first mbuf so if the creator has left * some room at the beginning (e.g. for inserting protocol headers) * the copies still have the room available. */ struct mbuf * m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; MBUF_CHECKSLEEP(how); n = m_get(how, m->m_type); top = n; if (n == NULL) goto nospace; if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; mb_dupcl(n, m); } else { n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } m = m->m_next; while (m) { o = m_get(how, m->m_type); if (o == NULL) goto nospace; n->m_next = o; n = n->m_next; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; mb_dupcl(n, m); } else { bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } m = m->m_next; } return top; nospace: m_freem(top); return (NULL); } /* * Copy data from an mbuf chain starting "off" bytes from the beginning, * continuing for "len" bytes, into the indicated buffer. */ void m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) { u_int count; KASSERT(off >= 0, ("m_copydata, negative off %d", off)); KASSERT(len >= 0, ("m_copydata, negative len %d", len)); while (off > 0) { KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); count = min(m->m_len - off, len); bcopy(mtod(m, caddr_t) + off, cp, count); len -= count; cp += count; off = 0; m = m->m_next; } } /* * Copy a packet header mbuf chain into a completely new chain, including * copying any mbuf clusters. Use this instead of m_copypacket() when * you need a writable copy of an mbuf chain. */ struct mbuf * m_dup(const struct mbuf *m, int how) { struct mbuf **p, *top = NULL; int remain, moff, nsize; MBUF_CHECKSLEEP(how); /* Sanity check */ if (m == NULL) return (NULL); M_ASSERTPKTHDR(m); /* While there's more data, get a new mbuf, tack it on, and fill it */ remain = m->m_pkthdr.len; moff = 0; p = ⊤ while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ struct mbuf *n; /* Get the next new mbuf */ if (remain >= MINCLSIZE) { n = m_getcl(how, m->m_type, 0); nsize = MCLBYTES; } else { n = m_get(how, m->m_type); nsize = MLEN; } if (n == NULL) goto nospace; if (top == NULL) { /* First one, must be PKTHDR */ if (!m_dup_pkthdr(n, m, how)) { m_free(n); goto nospace; } if ((n->m_flags & M_EXT) == 0) nsize = MHLEN; n->m_flags &= ~M_RDONLY; } n->m_len = 0; /* Link it into the new chain */ *p = n; p = &n->m_next; /* Copy data from original mbuf(s) into new mbuf */ while (n->m_len < nsize && m != NULL) { int chunk = min(nsize - n->m_len, m->m_len - moff); bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); moff += chunk; n->m_len += chunk; remain -= chunk; if (moff == m->m_len) { m = m->m_next; moff = 0; } } /* Check correct total mbuf length */ KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), ("%s: bogus m_pkthdr.len", __func__)); } return (top); nospace: m_freem(top); return (NULL); } /* * Concatenate mbuf chain n to m. * Both chains must be of the same type (e.g. MT_DATA). * Any m_pkthdr is not updated. */ void m_cat(struct mbuf *m, struct mbuf *n) { while (m->m_next) m = m->m_next; while (n) { if (!M_WRITABLE(m) || M_TRAILINGSPACE(m) < n->m_len) { /* just join the two chains */ m->m_next = n; return; } /* splat the data from one into the other */ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)n->m_len); m->m_len += n->m_len; n = m_free(n); } } /* * Concatenate two pkthdr mbuf chains. */ void m_catpkt(struct mbuf *m, struct mbuf *n) { M_ASSERTPKTHDR(m); M_ASSERTPKTHDR(n); m->m_pkthdr.len += n->m_pkthdr.len; m_demote(n, 1, 0); m_cat(m, n); } void m_adj(struct mbuf *mp, int req_len) { int len = req_len; struct mbuf *m; int count; if ((m = mp) == NULL) return; if (len >= 0) { /* * Trim from head. */ while (m != NULL && len > 0) { if (m->m_len <= len) { len -= m->m_len; m->m_len = 0; m = m->m_next; } else { m->m_len -= len; m->m_data += len; len = 0; } } if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= (req_len - len); } else { /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ len = -len; count = 0; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len >= len) { m->m_len -= len; if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= len; return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ m = mp; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len = count; for (; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (m->m_next != NULL) { m_freem(m->m_next); m->m_next = NULL; } break; } count -= m->m_len; } } } /* * Rearange an mbuf chain so that len bytes are contiguous * and in the data area of an mbuf (so that mtod will work * for a structure of size len). Returns the resulting * mbuf chain on success, frees it and returns null on failure. * If there is room, it will add up to max_protohdr-len extra bytes to the * contiguous region in an attempt to avoid being called next time. */ struct mbuf * m_pullup(struct mbuf *n, int len) { struct mbuf *m; int count; int space; /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ if ((n->m_flags & M_EXT) == 0 && n->m_data + len < &n->m_dat[MLEN] && n->m_next) { if (n->m_len >= len) return (n); m = n; n = n->m_next; len -= m->m_len; } else { if (len > MHLEN) goto bad; m = m_get(M_NOWAIT, n->m_type); if (m == NULL) goto bad; if (n->m_flags & M_PKTHDR) m_move_pkthdr(m, n); } space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); return (NULL); } /* * Like m_pullup(), except a new mbuf is always allocated, and we allow * the amount of empty space before the data in the new mbuf to be specified * (in the event that the caller expects to prepend later). */ -int MSFail; - struct mbuf * m_copyup(struct mbuf *n, int len, int dstoff) { struct mbuf *m; int count, space; if (len > (MHLEN - dstoff)) goto bad; m = m_get(M_NOWAIT, n->m_type); if (m == NULL) goto bad; if (n->m_flags & M_PKTHDR) m_move_pkthdr(m, n); m->m_data += dstoff; space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), (unsigned)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); - MSFail++; return (NULL); } /* * Partition an mbuf chain in two pieces, returning the tail -- * all but the first len0 bytes. In case of failure, it returns NULL and * attempts to restore the chain to its original state. * * Note that the resulting mbufs might be read-only, because the new * mbuf can end up sharing an mbuf cluster with the original mbuf if * the "breaking point" happens to lie within a cluster mbuf. Use the * M_WRITABLE() macro to check for this case. */ struct mbuf * m_split(struct mbuf *m0, int len0, int wait) { struct mbuf *m, *n; u_int len = len0, remain; MBUF_CHECKSLEEP(wait); for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; if (m == NULL) return (NULL); remain = m->m_len - len; if (m0->m_flags & M_PKTHDR && remain == 0) { n = m_gethdr(wait, m0->m_type); if (n == NULL) return (NULL); n->m_next = m->m_next; m->m_next = NULL; n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; return (n); } else if (m0->m_flags & M_PKTHDR) { n = m_gethdr(wait, m0->m_type); if (n == NULL) return (NULL); n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; if (m->m_flags & M_EXT) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ M_ALIGN(n, 0); n->m_next = m_split(m, len, wait); if (n->m_next == NULL) { (void) m_free(n); return (NULL); } else { n->m_len = 0; return (n); } } else M_ALIGN(n, remain); } else if (remain == 0) { n = m->m_next; m->m_next = NULL; return (n); } else { n = m_get(wait, m->m_type); if (n == NULL) return (NULL); M_ALIGN(n, remain); } extpacket: if (m->m_flags & M_EXT) { n->m_data = m->m_data + len; mb_dupcl(n, m); } else { bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); } n->m_len = remain; m->m_len = len; n->m_next = m->m_next; m->m_next = NULL; return (n); } /* * Routine to copy from device local memory into mbufs. * Note that `off' argument is offset into first mbuf of target chain from * which to begin copying the data to. */ struct mbuf * m_devget(char *buf, int totlen, int off, struct ifnet *ifp, void (*copy)(char *from, caddr_t to, u_int len)) { struct mbuf *m; struct mbuf *top = NULL, **mp = ⊤ int len; if (off < 0 || off > MHLEN) return (NULL); while (totlen > 0) { if (top == NULL) { /* First one, must be PKTHDR */ if (totlen + off >= MINCLSIZE) { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); len = MCLBYTES; } else { m = m_gethdr(M_NOWAIT, MT_DATA); len = MHLEN; /* Place initial small packet/header at end of mbuf */ if (m && totlen + off + max_linkhdr <= MLEN) { m->m_data += max_linkhdr; len -= max_linkhdr; } } if (m == NULL) return NULL; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = totlen; } else { if (totlen + off >= MINCLSIZE) { m = m_getcl(M_NOWAIT, MT_DATA, 0); len = MCLBYTES; } else { m = m_get(M_NOWAIT, MT_DATA); len = MLEN; } if (m == NULL) { m_freem(top); return NULL; } } if (off) { m->m_data += off; len -= off; off = 0; } m->m_len = len = min(totlen, len); if (copy) copy(buf, mtod(m, caddr_t), (u_int)len); else bcopy(buf, mtod(m, caddr_t), (u_int)len); buf += len; *mp = m; mp = &m->m_next; totlen -= len; } return (top); } /* * Copy data from a buffer back into the indicated mbuf chain, * starting "off" bytes from the beginning, extending the mbuf * chain if necessary. */ void m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp) { int mlen; struct mbuf *m = m0, *n; int totlen = 0; if (m0 == NULL) return; while (off > (mlen = m->m_len)) { off -= mlen; totlen += mlen; if (m->m_next == NULL) { n = m_get(M_NOWAIT, m->m_type); if (n == NULL) goto out; bzero(mtod(n, caddr_t), MLEN); n->m_len = min(MLEN, len + off); m->m_next = n; } m = m->m_next; } while (len > 0) { if (m->m_next == NULL && (len > m->m_len - off)) { m->m_len += min(len - (m->m_len - off), M_TRAILINGSPACE(m)); } mlen = min (m->m_len - off, len); bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); cp += mlen; len -= mlen; mlen += off; off = 0; totlen += mlen; if (len == 0) break; if (m->m_next == NULL) { n = m_get(M_NOWAIT, m->m_type); if (n == NULL) break; n->m_len = min(MLEN, len); m->m_next = n; } m = m->m_next; } out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) m->m_pkthdr.len = totlen; } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * Return 1 if able to complete the job; otherwise 0. */ int m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space, remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_get(M_NOWAIT, m->m_type); if (n == NULL) break; n->m_len = min(MLEN, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len, remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } /* * Apply function f to the data in an mbuf chain starting "off" bytes from * the beginning, continuing for "len" bytes. */ int m_apply(struct mbuf *m, int off, int len, int (*f)(void *, void *, u_int), void *arg) { u_int count; int rval; KASSERT(off >= 0, ("m_apply, negative off %d", off)); KASSERT(len >= 0, ("m_apply, negative len %d", len)); while (off > 0) { KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); count = min(m->m_len - off, len); rval = (*f)(arg, mtod(m, caddr_t) + off, count); if (rval) return (rval); len -= count; off = 0; m = m->m_next; } return (0); } /* * Return a pointer to mbuf/offset of location in mbuf chain. */ struct mbuf * m_getptr(struct mbuf *m, int loc, int *off) { while (loc >= 0) { /* Normal end of search. */ if (m->m_len > loc) { *off = loc; return (m); } else { loc -= m->m_len; if (m->m_next == NULL) { if (loc == 0) { /* Point at the end of valid data. */ *off = m->m_len; return (m); } return (NULL); } m = m->m_next; } } return (NULL); } void m_print(const struct mbuf *m, int maxlen) { int len; int pdata; const struct mbuf *m2; if (m == NULL) { printf("mbuf: %p\n", m); return; } if (m->m_flags & M_PKTHDR) len = m->m_pkthdr.len; else len = -1; m2 = m; while (m2 != NULL && (len == -1 || len)) { pdata = m2->m_len; if (maxlen != -1 && pdata > maxlen) pdata = maxlen; printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len, m2->m_next, m2->m_flags, "\20\20freelist\17skipfw" "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly" "\3eor\2pkthdr\1ext", pdata ? "" : "\n"); if (pdata) printf(", %*D\n", pdata, (u_char *)m2->m_data, "-"); if (len != -1) len -= m2->m_len; m2 = m2->m_next; } if (len > 0) printf("%d bytes unaccounted for.\n", len); return; } u_int m_fixhdr(struct mbuf *m0) { u_int len; len = m_length(m0, NULL); m0->m_pkthdr.len = len; return (len); } u_int m_length(struct mbuf *m0, struct mbuf **last) { struct mbuf *m; u_int len; len = 0; for (m = m0; m != NULL; m = m->m_next) { len += m->m_len; if (m->m_next == NULL) break; } if (last != NULL) *last = m; return (len); } /* * Defragment a mbuf chain, returning the shortest possible * chain of mbufs and clusters. If allocation fails and * this cannot be completed, NULL will be returned, but * the passed in chain will be unchanged. Upon success, * the original chain will be freed, and the new chain * will be returned. * * If a non-packet header is passed in, the original * mbuf (chain?) will be returned unharmed. */ struct mbuf * m_defrag(struct mbuf *m0, int how) { struct mbuf *m_new = NULL, *m_final = NULL; int progress = 0, length; MBUF_CHECKSLEEP(how); if (!(m0->m_flags & M_PKTHDR)) return (m0); m_fixhdr(m0); /* Needed sanity check */ #ifdef MBUF_STRESS_TEST if (m_defragrandomfailures) { int temp = arc4random() & 0xff; if (temp == 0xba) goto nospace; } #endif if (m0->m_pkthdr.len > MHLEN) m_final = m_getcl(how, MT_DATA, M_PKTHDR); else m_final = m_gethdr(how, MT_DATA); if (m_final == NULL) goto nospace; if (m_dup_pkthdr(m_final, m0, how) == 0) goto nospace; m_new = m_final; while (progress < m0->m_pkthdr.len) { length = m0->m_pkthdr.len - progress; if (length > MCLBYTES) length = MCLBYTES; if (m_new == NULL) { if (length > MLEN) m_new = m_getcl(how, MT_DATA, 0); else m_new = m_get(how, MT_DATA); if (m_new == NULL) goto nospace; } m_copydata(m0, progress, length, mtod(m_new, caddr_t)); progress += length; m_new->m_len = length; if (m_new != m_final) m_cat(m_final, m_new); m_new = NULL; } #ifdef MBUF_STRESS_TEST if (m0->m_next == NULL) m_defraguseless++; #endif m_freem(m0); m0 = m_final; #ifdef MBUF_STRESS_TEST m_defragpackets++; m_defragbytes += m0->m_pkthdr.len; #endif return (m0); nospace: #ifdef MBUF_STRESS_TEST m_defragfailure++; #endif if (m_final) m_freem(m_final); return (NULL); } /* * Defragment an mbuf chain, returning at most maxfrags separate * mbufs+clusters. If this is not possible NULL is returned and * the original mbuf chain is left in it's present (potentially * modified) state. We use two techniques: collapsing consecutive * mbufs and replacing consecutive mbufs by a cluster. * * NB: this should really be named m_defrag but that name is taken */ struct mbuf * m_collapse(struct mbuf *m0, int how, int maxfrags) { struct mbuf *m, *n, *n2, **prev; u_int curfrags; /* * Calculate the current number of frags. */ curfrags = 0; for (m = m0; m != NULL; m = m->m_next) curfrags++; /* * First, try to collapse mbufs. Note that we always collapse * towards the front so we don't need to deal with moving the * pkthdr. This may be suboptimal if the first mbuf has much * less data than the following. */ m = m0; again: for (;;) { n = m->m_next; if (n == NULL) break; if (M_WRITABLE(m) && n->m_len < M_TRAILINGSPACE(m)) { bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, n->m_len); m->m_len += n->m_len; m->m_next = n->m_next; m_free(n); if (--curfrags <= maxfrags) return m0; } else m = n; } KASSERT(maxfrags > 1, ("maxfrags %u, but normal collapse failed", maxfrags)); /* * Collapse consecutive mbufs to a cluster. */ prev = &m0->m_next; /* NB: not the first mbuf */ while ((n = *prev) != NULL) { if ((n2 = n->m_next) != NULL && n->m_len + n2->m_len < MCLBYTES) { m = m_getcl(how, MT_DATA, 0); if (m == NULL) goto bad; bcopy(mtod(n, void *), mtod(m, void *), n->m_len); bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len, n2->m_len); m->m_len = n->m_len + n2->m_len; m->m_next = n2->m_next; *prev = m; m_free(n); m_free(n2); if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */ return m0; /* * Still not there, try the normal collapse * again before we allocate another cluster. */ goto again; } prev = &n->m_next; } /* * No place where we can collapse to a cluster; punt. * This can occur if, for example, you request 2 frags * but the packet requires that both be clusters (we * never reallocate the first mbuf to avoid moving the * packet header). */ bad: return NULL; } #ifdef MBUF_STRESS_TEST /* * Fragment an mbuf chain. There's no reason you'd ever want to do * this in normal usage, but it's great for stress testing various * mbuf consumers. * * If fragmentation is not possible, the original chain will be * returned. * * Possible length values: * 0 no fragmentation will occur * > 0 each fragment will be of the specified length * -1 each fragment will be the same random value in length * -2 each fragment's length will be entirely random * (Random values range from 1 to 256) */ struct mbuf * m_fragment(struct mbuf *m0, int how, int length) { struct mbuf *m_new = NULL, *m_final = NULL; int progress = 0; if (!(m0->m_flags & M_PKTHDR)) return (m0); if ((length == 0) || (length < -2)) return (m0); m_fixhdr(m0); /* Needed sanity check */ m_final = m_getcl(how, MT_DATA, M_PKTHDR); if (m_final == NULL) goto nospace; if (m_dup_pkthdr(m_final, m0, how) == 0) goto nospace; m_new = m_final; if (length == -1) length = 1 + (arc4random() & 255); while (progress < m0->m_pkthdr.len) { int fraglen; if (length > 0) fraglen = length; else fraglen = 1 + (arc4random() & 255); if (fraglen > m0->m_pkthdr.len - progress) fraglen = m0->m_pkthdr.len - progress; if (fraglen > MCLBYTES) fraglen = MCLBYTES; if (m_new == NULL) { m_new = m_getcl(how, MT_DATA, 0); if (m_new == NULL) goto nospace; } m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t)); progress += fraglen; m_new->m_len = fraglen; if (m_new != m_final) m_cat(m_final, m_new); m_new = NULL; } m_freem(m0); m0 = m_final; return (m0); nospace: if (m_final) m_freem(m_final); /* Return the original chain on failure */ return (m0); } #endif /* * Copy the contents of uio into a properly sized mbuf chain. */ struct mbuf * m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) { struct mbuf *m, *mb; int error, length; ssize_t total; int progress = 0; /* * len can be zero or an arbitrary large value bound by * the total data supplied by the uio. */ if (len > 0) total = min(uio->uio_resid, len); else total = uio->uio_resid; /* * The smallest unit returned by m_getm2() is a single mbuf * with pkthdr. We can't align past it. */ if (align >= MHLEN) return (NULL); /* * Give us the full allocation or nothing. * If len is zero return the smallest empty mbuf. */ m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags); if (m == NULL) return (NULL); m->m_data += align; /* Fill all mbufs with uio data and update header information. */ for (mb = m; mb != NULL; mb = mb->m_next) { length = min(M_TRAILINGSPACE(mb), total - progress); error = uiomove(mtod(mb, void *), length, uio); if (error) { m_freem(m); return (NULL); } mb->m_len = length; progress += length; if (flags & M_PKTHDR) m->m_pkthdr.len += length; } KASSERT(progress == total, ("%s: progress != total", __func__)); return (m); } /* * Copy an mbuf chain into a uio limited by len if set. */ int m_mbuftouio(struct uio *uio, struct mbuf *m, int len) { int error, length, total; int progress = 0; if (len > 0) total = min(uio->uio_resid, len); else total = uio->uio_resid; /* Fill the uio with data from the mbufs. */ for (; m != NULL; m = m->m_next) { length = min(m->m_len, total - progress); error = uiomove(mtod(m, void *), length, uio); if (error) return (error); progress += length; } return (0); } /* * Create a writable copy of the mbuf chain. While doing this * we compact the chain with a goal of producing a chain with * at most two mbufs. The second mbuf in this chain is likely * to be a cluster. The primary purpose of this work is to create * a writable packet for encryption, compression, etc. The * secondary goal is to linearize the data so the data can be * passed to crypto hardware in the most efficient manner possible. */ struct mbuf * m_unshare(struct mbuf *m0, int how) { struct mbuf *m, *mprev; struct mbuf *n, *mfirst, *mlast; int len, off; mprev = NULL; for (m = m0; m != NULL; m = mprev->m_next) { /* * Regular mbufs are ignored unless there's a cluster * in front of it that we can use to coalesce. We do * the latter mainly so later clusters can be coalesced * also w/o having to handle them specially (i.e. convert * mbuf+cluster -> cluster). This optimization is heavily * influenced by the assumption that we're running over * Ethernet where MCLBYTES is large enough that the max * packet size will permit lots of coalescing into a * single cluster. This in turn permits efficient * crypto operations, especially when using hardware. */ if ((m->m_flags & M_EXT) == 0) { if (mprev && (mprev->m_flags & M_EXT) && m->m_len <= M_TRAILINGSPACE(mprev)) { /* XXX: this ignores mbuf types */ memcpy(mtod(mprev, caddr_t) + mprev->m_len, mtod(m, caddr_t), m->m_len); mprev->m_len += m->m_len; mprev->m_next = m->m_next; /* unlink from chain */ m_free(m); /* reclaim mbuf */ #if 0 newipsecstat.ips_mbcoalesced++; #endif } else { mprev = m; } continue; } /* * Writable mbufs are left alone (for now). */ if (M_WRITABLE(m)) { mprev = m; continue; } /* * Not writable, replace with a copy or coalesce with * the previous mbuf if possible (since we have to copy * it anyway, we try to reduce the number of mbufs and * clusters so that future work is easier). */ KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); /* NB: we only coalesce into a cluster or larger */ if (mprev != NULL && (mprev->m_flags & M_EXT) && m->m_len <= M_TRAILINGSPACE(mprev)) { /* XXX: this ignores mbuf types */ memcpy(mtod(mprev, caddr_t) + mprev->m_len, mtod(m, caddr_t), m->m_len); mprev->m_len += m->m_len; mprev->m_next = m->m_next; /* unlink from chain */ m_free(m); /* reclaim mbuf */ #if 0 newipsecstat.ips_clcoalesced++; #endif continue; } /* * Allocate new space to hold the copy and copy the data. * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by * splitting them into clusters. We could just malloc a * buffer and make it external but too many device drivers * don't know how to break up the non-contiguous memory when * doing DMA. */ n = m_getcl(how, m->m_type, m->m_flags); if (n == NULL) { m_freem(m0); return (NULL); } len = m->m_len; off = 0; mfirst = n; mlast = NULL; for (;;) { int cc = min(len, MCLBYTES); memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); n->m_len = cc; if (mlast != NULL) mlast->m_next = n; mlast = n; #if 0 newipsecstat.ips_clcopied++; #endif len -= cc; if (len <= 0) break; off += cc; n = m_getcl(how, m->m_type, m->m_flags); if (n == NULL) { m_freem(mfirst); m_freem(m0); return (NULL); } } n->m_next = m->m_next; if (mprev == NULL) m0 = mfirst; /* new head of chain */ else mprev->m_next = mfirst; /* replace old mbuf */ m_free(m); /* release old mbuf */ mprev = mfirst; } return (m0); } #ifdef MBUF_PROFILING #define MP_BUCKETS 32 /* don't just change this as things may overflow.*/ struct mbufprofile { uintmax_t wasted[MP_BUCKETS]; uintmax_t used[MP_BUCKETS]; uintmax_t segments[MP_BUCKETS]; } mbprof; #define MP_MAXDIGITS 21 /* strlen("16,000,000,000,000,000,000") == 21 */ #define MP_NUMLINES 6 #define MP_NUMSPERLINE 16 #define MP_EXTRABYTES 64 /* > strlen("used:\nwasted:\nsegments:\n") */ /* work out max space needed and add a bit of spare space too */ #define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE) #define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES) char mbprofbuf[MP_BUFSIZE]; void m_profile(struct mbuf *m) { int segments = 0; int used = 0; int wasted = 0; while (m) { segments++; used += m->m_len; if (m->m_flags & M_EXT) { wasted += MHLEN - sizeof(m->m_ext) + m->m_ext.ext_size - m->m_len; } else { if (m->m_flags & M_PKTHDR) wasted += MHLEN - m->m_len; else wasted += MLEN - m->m_len; } m = m->m_next; } /* be paranoid.. it helps */ if (segments > MP_BUCKETS - 1) segments = MP_BUCKETS - 1; if (used > 100000) used = 100000; if (wasted > 100000) wasted = 100000; /* store in the appropriate bucket */ /* don't bother locking. if it's slightly off, so what? */ mbprof.segments[segments]++; mbprof.used[fls(used)]++; mbprof.wasted[fls(wasted)]++; } static void mbprof_textify(void) { int offset; char *c; uint64_t *p; p = &mbprof.wasted[0]; c = mbprofbuf; offset = snprintf(c, MP_MAXLINE + 10, "wasted:\n" "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #ifdef BIG_ARRAY p = &mbprof.wasted[16]; c += offset; offset = snprintf(c, MP_MAXLINE, "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #endif p = &mbprof.used[0]; c += offset; offset = snprintf(c, MP_MAXLINE + 10, "used:\n" "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #ifdef BIG_ARRAY p = &mbprof.used[16]; c += offset; offset = snprintf(c, MP_MAXLINE, "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #endif p = &mbprof.segments[0]; c += offset; offset = snprintf(c, MP_MAXLINE + 10, "segments:\n" "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %ju\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #ifdef BIG_ARRAY p = &mbprof.segments[16]; c += offset; offset = snprintf(c, MP_MAXLINE, "%ju %ju %ju %ju %ju %ju %ju %ju " "%ju %ju %ju %ju %ju %ju %ju %jju", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); #endif } static int mbprof_handler(SYSCTL_HANDLER_ARGS) { int error; mbprof_textify(); error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1); return (error); } static int mbprof_clr_handler(SYSCTL_HANDLER_ARGS) { int clear, error; clear = 0; error = sysctl_handle_int(oidp, &clear, 0, req); if (error || !req->newptr) return (error); if (clear) { bzero(&mbprof, sizeof(mbprof)); } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, mbprof_handler, "A", "mbuf profiling statistics"); SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW, NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics"); #endif Index: projects/clang370-import/sys/netinet/tcp_input.c =================================================================== --- projects/clang370-import/sys/netinet/tcp_input.c (revision 288925) +++ projects/clang370-import/sys/netinet/tcp_input.c (revision 288926) @@ -1,3715 +1,3725 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 2007-2008,2010 * Swinburne University of Technology, Melbourne, Australia. * Copyright (c) 2009-2010 Lawrence Stewart * Copyright (c) 2010 The FreeBSD Foundation * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed at the Centre for Advanced Internet * Architectures, Swinburne University of Technology, by Lawrence Stewart, * James Healy and David Hayes, made possible in part by a grant from the Cisco * University Research Program Fund at Community Foundation Silicon Valley. * * Portions of this software were developed at the Centre for Advanced * Internet Architectures, Swinburne University of Technology, Melbourne, * Australia by David Hayes under sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ipfw.h" /* for ipfw_fwd */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include /* for proc0 declaration */ #include #include #include #include #include #include #include #include #include /* before tcp_seq.h, for tcp_random18() */ #include #include #include #include #include #define TCPSTATES /* for logging */ #include #include #include #include #include #include #include #include /* required for icmp_var.h */ #include /* for ICMP_BANDLIM */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ #ifdef TCP_OFFLOAD #include #endif #ifdef IPSEC #include #include #endif /*IPSEC*/ #include #include const int tcprexmtthresh = 3; int tcp_log_in_vain = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, &tcp_log_in_vain, 0, "Log all incoming TCP segments to closed ports"); VNET_DEFINE(int, blackhole) = 0; #define V_blackhole VNET(blackhole) SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(blackhole), 0, "Do not send RST on segments to closed ports"); VNET_DEFINE(int, tcp_delack_enabled) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_delack_enabled), 0, "Delay ACK to try and piggyback it onto a data packet"); VNET_DEFINE(int, drop_synfin) = 0; #define V_drop_synfin VNET(drop_synfin) SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(drop_synfin), 0, "Drop TCP packets with SYN+FIN set"); VNET_DEFINE(int, tcp_do_rfc3042) = 1; #define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042) SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3042), 0, "Enable RFC 3042 (Limited Transmit)"); VNET_DEFINE(int, tcp_do_rfc3390) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3390), 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0, "Experimental TCP extensions"); VNET_DEFINE(int, tcp_do_initcwnd10) = 1; SYSCTL_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_initcwnd10), 0, "Enable RFC 6928 (Increasing initial CWND to 10)"); VNET_DEFINE(int, tcp_do_rfc3465) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3465), 0, "Enable RFC 3465 (Appropriate Byte Counting)"); VNET_DEFINE(int, tcp_abc_l_var) = 2; SYSCTL_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_abc_l_var), 2, "Cap the max cwnd increment during slow-start to this number of segments"); static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN"); VNET_DEFINE(int, tcp_do_ecn) = 0; SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0, "TCP ECN support"); VNET_DEFINE(int, tcp_ecn_maxretries) = 1; SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0, "Max retries before giving up on ECN"); VNET_DEFINE(int, tcp_insecure_syn) = 0; #define V_tcp_insecure_syn VNET(tcp_insecure_syn) SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_insecure_syn), 0, "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets"); VNET_DEFINE(int, tcp_insecure_rst) = 0; #define V_tcp_insecure_rst VNET(tcp_insecure_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_insecure_rst), 0, "Follow RFC793 instead of RFC5961 criteria for accepting RST packets"); VNET_DEFINE(int, tcp_recvspace) = 1024*64; #define V_tcp_recvspace VNET(tcp_recvspace) SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size"); VNET_DEFINE(int, tcp_do_autorcvbuf) = 1; #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_autorcvbuf), 0, "Enable automatic receive buffer sizing"); VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024; #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_inc), 0, "Incrementor step size of automatic receive buffer"); VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024; #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, "Max size of automatic receive buffer"); VNET_DEFINE(struct inpcbhead, tcb); #define tcb6 tcb /* for KAME src sync over BSD*'s */ VNET_DEFINE(struct inpcbinfo, tcbinfo); static void tcp_dooptions(struct tcpopt *, u_char *, int, int); static void tcp_do_segment(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int); static void tcp_dropwithreset(struct mbuf *, struct tcphdr *, struct tcpcb *, int, int); static void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); static void tcp_xmit_timer(struct tcpcb *, int); static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type); static void inline cc_conn_init(struct tcpcb *tp); static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th); static void inline hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to); /* * TCP statistics are stored in an "array" of counter(9)s. */ VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat); VNET_PCPUSTAT_SYSINIT(tcpstat); SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat, tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(tcpstat); #endif /* VIMAGE */ /* * Kernel module interface for updating tcpstat. The argument is an index * into tcpstat treated as an array. */ void kmod_tcpstat_inc(int statnum) { counter_u64_add(VNET(tcpstat)[statnum], 1); } /* * Wrapper for the TCP established input helper hook. */ static void inline hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) { struct tcp_hhook_data hhook_data; if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) { hhook_data.tp = tp; hhook_data.th = th; hhook_data.to = to; hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data, tp->osd); } } /* * CC wrapper hook functions */ static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type) { INP_WLOCK_ASSERT(tp->t_inpcb); tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); if (tp->snd_cwnd <= tp->snd_wnd) tp->ccv->flags |= CCF_CWND_LIMITED; else tp->ccv->flags &= ~CCF_CWND_LIMITED; if (type == CC_ACK) { if (tp->snd_cwnd > tp->snd_ssthresh) { tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, V_tcp_abc_l_var * tp->t_maxseg); if (tp->t_bytes_acked >= tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; tp->ccv->flags |= CCF_ABC_SENTAWND; } } else { tp->ccv->flags &= ~CCF_ABC_SENTAWND; tp->t_bytes_acked = 0; } } if (CC_ALGO(tp)->ack_received != NULL) { /* XXXLAS: Find a way to live without this */ tp->ccv->curack = th->th_ack; CC_ALGO(tp)->ack_received(tp->ccv, type); } } static void inline cc_conn_init(struct tcpcb *tp) { struct hc_metrics_lite metrics; struct inpcb *inp = tp->t_inpcb; int rtt; INP_WLOCK_ASSERT(tp->t_inpcb); tcp_hc_get(&inp->inp_inc, &metrics); if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { tp->t_srtt = rtt; tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; TCPSTAT_INC(tcps_usedrtt); if (metrics.rmx_rttvar) { tp->t_rttvar = metrics.rmx_rttvar; TCPSTAT_INC(tcps_usedrttvar); } else { /* default variation is +- 1 rtt */ tp->t_rttvar = tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; } TCPT_RANGESET(tp->t_rxtcur, ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, tp->t_rttmin, TCPTV_REXMTMAX); } if (metrics.rmx_ssthresh) { /* * There's some sort of gateway or interface * buffer limit on the path. Use this to set * the slow start threshhold, but set the * threshold to no less than 2*mss. */ tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); TCPSTAT_INC(tcps_usedssthresh); } /* * Set the initial slow-start flight size. * * RFC5681 Section 3.1 specifies the default conservative values. * RFC3390 specifies slightly more aggressive values. * RFC6928 increases it to ten segments. * * If a SYN or SYN/ACK was lost and retransmitted, we have to * reduce the initial CWND to one segment as congestion is likely * requiring us to be cautious. */ if (tp->snd_cwnd == 1) tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ else if (V_tcp_do_initcwnd10) tp->snd_cwnd = min(10 * tp->t_maxseg, max(2 * tp->t_maxseg, 14600)); else if (V_tcp_do_rfc3390) tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380)); else { /* Per RFC5681 Section 3.1 */ if (tp->t_maxseg > 2190) tp->snd_cwnd = 2 * tp->t_maxseg; else if (tp->t_maxseg > 1095) tp->snd_cwnd = 3 * tp->t_maxseg; else tp->snd_cwnd = 4 * tp->t_maxseg; } if (CC_ALGO(tp)->conn_init != NULL) CC_ALGO(tp)->conn_init(tp->ccv); } void inline cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) { INP_WLOCK_ASSERT(tp->t_inpcb); switch(type) { case CC_NDUPACK: if (!IN_FASTRECOVERY(tp->t_flags)) { tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } break; case CC_ECN: if (!IN_CONGRECOVERY(tp->t_flags)) { TCPSTAT_INC(tcps_ecn_rcwnd); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } break; case CC_RTO: tp->t_dupacks = 0; tp->t_bytes_acked = 0; EXIT_RECOVERY(tp->t_flags); tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg) * tp->t_maxseg; tp->snd_cwnd = tp->t_maxseg; break; case CC_RTO_ERR: TCPSTAT_INC(tcps_sndrexmitbad); /* RTO was unnecessary, so reset everything. */ tp->snd_cwnd = tp->snd_cwnd_prev; tp->snd_ssthresh = tp->snd_ssthresh_prev; tp->snd_recover = tp->snd_recover_prev; if (tp->t_flags & TF_WASFRECOVERY) ENTER_FASTRECOVERY(tp->t_flags); if (tp->t_flags & TF_WASCRECOVERY) ENTER_CONGRECOVERY(tp->t_flags); tp->snd_nxt = tp->snd_max; tp->t_flags &= ~TF_PREVVALID; tp->t_badrxtwin = 0; break; } if (CC_ALGO(tp)->cong_signal != NULL) { if (th != NULL) tp->ccv->curack = th->th_ack; CC_ALGO(tp)->cong_signal(tp->ccv, type); } } static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th) { INP_WLOCK_ASSERT(tp->t_inpcb); /* XXXLAS: KASSERT that we're in recovery? */ if (CC_ALGO(tp)->post_recovery != NULL) { tp->ccv->curack = th->th_ack; CC_ALGO(tp)->post_recovery(tp->ccv); } /* XXXLAS: EXIT_RECOVERY ? */ tp->t_bytes_acked = 0; } #ifdef TCP_SIGNATURE static inline int tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen, struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) { int ret; tcp_fields_to_net(th); ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag); tcp_fields_to_host(th); return (ret); } #endif /* * Indicate whether this ack should be delayed. We can delay the ack if * following conditions are met: * - There is no delayed ack timer in progress. * - Our last ack wasn't a 0-sized window. We never want to delay * the ack that opens up a 0-sized window. * - LRO wasn't used for this segment. We make sure by checking that the * segment size is not larger than the MSS. * - Delayed acks are enabled or this is a half-synchronized T/TCP * connection. */ #define DELAY_ACK(tp, tlen) \ ((!tcp_timer_active(tp, TT_DELACK) && \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ (tlen <= tp->t_maxopd) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) static void inline cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) { INP_WLOCK_ASSERT(tp->t_inpcb); if (CC_ALGO(tp)->ecnpkt_handler != NULL) { switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->ccv->flags |= CCF_IPHDR_CE; break; case IPTOS_ECN_ECT0: tp->ccv->flags &= ~CCF_IPHDR_CE; break; case IPTOS_ECN_ECT1: tp->ccv->flags &= ~CCF_IPHDR_CE; break; } if (th->th_flags & TH_CWR) tp->ccv->flags |= CCF_TCPHDR_CWR; else tp->ccv->flags &= ~CCF_TCPHDR_CWR; if (tp->t_flags & TF_DELACK) tp->ccv->flags |= CCF_DELACK; else tp->ccv->flags &= ~CCF_DELACK; CC_ALGO(tp)->ecnpkt_handler(tp->ccv); if (tp->ccv->flags & CCF_ACKNOW) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } } /* * TCP input handling is split into multiple parts: * tcp6_input is a thin wrapper around tcp_input for the extended * ip6_protox[] call format in ip6_input * tcp_input handles primary segment validation, inpcb lookup and * SYN processing on listen sockets * tcp_do_segment processes the ACK and text of the segment for * establishing, established and closing connections */ #ifdef INET6 int tcp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct in6_ifaddr *ia6; struct ip6_hdr *ip6; IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE); /* * draft-itojun-ipv6-tcp-to-anycast * better place to put this in? */ ip6 = mtod(m, struct ip6_hdr *); ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) { struct ip6_hdr *ip6; ifa_free(&ia6->ia_ifa); ip6 = mtod(m, struct ip6_hdr *); icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, (caddr_t)&ip6->ip6_dst - (caddr_t)ip6); return (IPPROTO_DONE); } if (ia6) ifa_free(&ia6->ia_ifa); return (tcp_input(mp, offp, proto)); } #endif /* INET6 */ int tcp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct tcphdr *th = NULL; struct ip *ip = NULL; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; struct socket *so = NULL; u_char *optp = NULL; int off0; int optlen = 0; #ifdef INET int len; #endif int tlen = 0, off; int drop_hdrlen; int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ #ifdef TCP_SIGNATURE uint8_t sig_checked = 0; #endif uint8_t iptos = 0; struct m_tag *fwd_tag = NULL; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6; #else const void *ip6 = NULL; #endif /* INET6 */ struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ int ti_locked; #define TI_UNLOCKED 1 #define TI_RLOCKED 2 #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif off0 = *offp; m = *mp; *mp = NULL; to.to_flags = 0; TCPSTAT_INC(tcps_rcvtotal); #ifdef INET6 if (isipv6) { /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */ if (m->m_len < (sizeof(*ip6) + sizeof(*th))) { m = m_pullup(m, sizeof(*ip6) + sizeof(*th)); if (m == NULL) { TCPSTAT_INC(tcps_rcvshort); return (IPPROTO_DONE); } } ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; else th->th_sum = in6_cksum_pseudo(ip6, tlen, IPPROTO_TCP, m->m_pkthdr.csum_data); th->th_sum ^= 0xffff; } else th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); if (th->th_sum) { TCPSTAT_INC(tcps_rcvbadsum); goto drop; } /* * Be proactive about unspecified IPv6 address in source. * As we use all-zero to indicate unbounded/unconnected pcb, * unspecified IPv6 address can be used to confuse us. * * Note that packets with unspecified IPv6 destination is * already dropped in ip6_input. */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { /* XXX stat */ goto drop; } } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { /* * Get IP and TCP header together in first mbuf. * Note: IP leaves IP header in first mbuf. */ if (off0 > sizeof (struct ip)) { ip_stripoptions(m); off0 = sizeof(struct ip); } if (m->m_len < sizeof (struct tcpiphdr)) { if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == NULL) { TCPSTAT_INC(tcps_rcvshort); return (IPPROTO_DONE); } } ip = mtod(m, struct ip *); th = (struct tcphdr *)((caddr_t)ip + off0); tlen = ntohs(ip->ip_len) - off0; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; else th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + tlen + IPPROTO_TCP)); th->th_sum ^= 0xffff; } else { struct ipovly *ipov = (struct ipovly *)ip; /* * Checksum extended TCP header and data. */ len = off0 + tlen; bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); ipov->ih_len = htons(tlen); th->th_sum = in_cksum(m, len); /* Reset length for SDT probes. */ ip->ip_len = htons(tlen + off0); } if (th->th_sum) { TCPSTAT_INC(tcps_rcvbadsum); goto drop; } /* Re-initialization for later version check */ ip->ip_v = IPVERSION; } #endif /* INET */ #ifdef INET6 if (isipv6) iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET iptos = ip->ip_tos; #endif /* * Check that TCP offset makes sense, * pull out TCP options and adjust length. XXX */ off = th->th_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { TCPSTAT_INC(tcps_rcvbadoff); goto drop; } tlen -= off; /* tlen is used instead of ti->ti_len */ if (off > sizeof (struct tcphdr)) { #ifdef INET6 if (isipv6) { IP6_EXTHDR_CHECK(m, off0, off, IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { if (m->m_len < sizeof(struct ip) + off) { if ((m = m_pullup(m, sizeof (struct ip) + off)) == NULL) { TCPSTAT_INC(tcps_rcvshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); th = (struct tcphdr *)((caddr_t)ip + off0); } } #endif optlen = off - sizeof (struct tcphdr); optp = (u_char *)(th + 1); } thflags = th->th_flags; /* * Convert TCP protocol specific fields to host format. */ tcp_fields_to_host(th); /* * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options. */ drop_hdrlen = off0 + off; /* * Locate pcb for segment; if we're likely to add or remove a * connection then first acquire pcbinfo lock. There are three cases * where we might discover later we need a write lock despite the * flags: ACKs moving a connection out of the syncache, ACKs for a * connection in TIMEWAIT and SYNs not targeting a listening socket. */ if ((thflags & (TH_FIN | TH_RST)) != 0) { INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; } else ti_locked = TI_UNLOCKED; /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ( #ifdef INET6 (isipv6 && (m->m_flags & M_IP6_NEXTHOP)) #ifdef INET || (!isipv6 && (m->m_flags & M_IP_NEXTHOP)) #endif #endif #if defined(INET) && !defined(INET6) (m->m_flags & M_IP_NEXTHOP) #endif ) fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); findpcb: #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif #ifdef INET6 if (isipv6 && fwd_tag != NULL) { struct sockaddr_in6 *next_hop6; next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } } else if (isipv6) { inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET if (fwd_tag != NULL) { struct sockaddr_in *next_hop; next_hop = (struct sockaddr_in *)(fwd_tag+1); /* * Transparently forwarded. Pretend to be the destination. * already got one like this? */ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in_pcblookup(&V_tcbinfo, ip->ip_src, th->th_sport, next_hop->sin_addr, next_hop->sin_port ? ntohs(next_hop->sin_port) : th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } } else inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); #endif /* INET */ /* * If the INPCB does not exist then all data in the incoming * segment is discarded and an appropriate RST is sent back. * XXX MRT Send RST using which routing table? */ if (inp == NULL) { /* * Log communication attempts to ports that are not * in use. */ if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) || tcp_log_in_vain == 2) { if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6))) log(LOG_INFO, "%s; %s: Connection attempt " "to closed port\n", s, __func__); } /* * When blackholing do not respond with a RST but * completely ignore the segment and drop it. */ if ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole == 2) goto dropunlock; rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } INP_WLOCK_ASSERT(inp); if ((inp->inp_flowtype == M_HASHTYPE_NONE) && (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) && ((inp->inp_socket == NULL) || (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) { inp->inp_flowid = m->m_pkthdr.flowid; inp->inp_flowtype = M_HASHTYPE_GET(m); } #ifdef IPSEC #ifdef INET6 if (isipv6 && ipsec6_in_reject(m, inp)) { goto dropunlock; } else #endif /* INET6 */ if (ipsec4_in_reject(m, inp) != 0) { goto dropunlock; } #endif /* IPSEC */ /* * Check the minimum TTL for socket. */ if (inp->inp_ip_minttl != 0) { #ifdef INET6 if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim) goto dropunlock; else #endif if (inp->inp_ip_minttl > ip->ip_ttl) goto dropunlock; } /* * A previous connection in TIMEWAIT state is supposed to catch stray * or duplicate segments arriving late. If this segment was a * legitimate new connection attempt, the old INPCB gets removed and * we can try again to find a listening socket. * * At this point, due to earlier optimism, we may hold only an inpcb * lock, and not the inpcbinfo write lock. If so, we need to try to * acquire it, or if that fails, acquire a reference on the inpcb, * drop all locks, acquire a global write lock, and then re-acquire * the inpcb lock. We may at that point discover that another thread * has tried to free the inpcb, in which case we need to loop back * and try to find a new inpcb to deliver to. * * XXXRW: It may be time to rethink timewait locking. */ relocked: if (inp->inp_flags & INP_TIMEWAIT) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } } else ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if (thflags & TH_SYN) tcp_dooptions(&to, optp, optlen, TO_SYN); /* * NB: tcp_twcheck unlocks the INP and frees the mbuf. */ if (tcp_twcheck(inp, &to, th, m, tlen)) goto findpcb; INP_INFO_RUNLOCK(&V_tcbinfo); return (IPPROTO_DONE); } /* * The TCPCB may no longer exist if the connection is winding * down or it is in the CLOSED state. Either way we drop the * segment and send an appropriate response. */ tp = intotcpcb(inp); if (tp == NULL || tp->t_state == TCPS_CLOSED) { rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_input(tp, m); m = NULL; /* consumed by the TOE driver */ goto dropunlock; } #endif /* * We've identified a valid inpcb, but it could be that we need an * inpcbinfo write lock but don't hold it. In this case, attempt to * acquire using the same strategy as the TIMEWAIT case above. If we * relock, we have to jump back to 'relocked' as the connection might * now be in TIMEWAIT. */ #ifdef INVARIANTS if ((thflags & (TH_FIN | TH_RST)) != 0) INP_INFO_RLOCK_ASSERT(&V_tcbinfo); #endif if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } goto relocked; } else ti_locked = TI_RLOCKED; } INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } #ifdef MAC INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) goto dropunlock; #endif so = inp->inp_socket; KASSERT(so != NULL, ("%s: so == NULL", __func__)); #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) { ostate = tp->t_state; #ifdef INET6 if (isipv6) { bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6)); } else #endif bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip)); tcp_savetcp = *th; } #endif /* TCPDEBUG */ /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection * attempt or the completion of a previous one. */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " "tp not listening", __func__)); bzero(&inc, sizeof(inc)); #ifdef INET6 if (isipv6) { inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = ip6->ip6_src; inc.inc6_laddr = ip6->ip6_dst; } else #endif { inc.inc_faddr = ip->ip_src; inc.inc_laddr = ip->ip_dst; } inc.inc_fport = th->th_sport; inc.inc_lport = th->th_dport; inc.inc_fibnum = so->so_fibnum; /* * Check for an existing connection attempt in syncache if * the flag is only ACK. A successful lookup creates a new * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected * timestamp. */ tcp_dooptions(&to, optp, optlen, 0); /* * NB: syncache_expand() doesn't unlock * inp and tcpinfo locks. */ if (!syncache_expand(&inc, &to, th, &so, m)) { /* * No syncache entry or ACK was not * for our SYN/ACK. Send a RST. * NB: syncache did its own logging * of the failure cause. */ rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } if (so == NULL) { /* * We completed the 3-way handshake * but could not allocate a socket * either due to memory shortage, * listen queue length limits or * global socket limits. Send RST * or wait and have the remote end * retransmit the ACK for another * try. */ if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Socket allocation failed due to " "limits or memory shortage, %s\n", s, __func__, V_tcp_sc_rst_sock_fail ? "sending RST" : "try again"); if (V_tcp_sc_rst_sock_fail) { rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } else goto dropunlock; } /* * Socket is created in state SYN_RECEIVED. * Unlock the listen socket, lock the newly * created socket and update the tp variable. */ INP_WUNLOCK(inp); /* listen socket */ inp = sotoinpcb(so); /* * New connection inpcb is already locked by * syncache_expand(). */ INP_WLOCK_ASSERT(inp); tp = intotcpcb(inp); KASSERT(tp->t_state == TCPS_SYN_RECEIVED, ("%s: ", __func__)); #ifdef TCP_SIGNATURE if (sig_checked == 0) { tcp_dooptions(&to, optp, optlen, (thflags & TH_SYN) ? TO_SYN : 0); if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, th, tp->t_flags)) { /* * In SYN_SENT state if it receives an * RST, it is allowed for further * processing. */ if ((thflags & TH_RST) == 0 || (tp->t_state == TCPS_SYN_SENT) == 0) goto dropunlock; } sig_checked = 1; } #endif /* * Process the segment and the data it * contains. tcp_do_segment() consumes * the mbuf chain and unlocks the inpcb. */ tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } /* * Segment flag validation for new connection attempts: * * Our (SYN|ACK) response was rejected. * Check with syncache and remove entry to prevent * retransmits. * * NB: syncache_chkrst does its own logging of failure * causes. */ if (thflags & TH_RST) { syncache_chkrst(&inc, th); goto dropunlock; } /* * We can't do anything without SYN. */ if ((thflags & TH_SYN) == 0) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN is missing, segment ignored\n", s, __func__); TCPSTAT_INC(tcps_badsyn); goto dropunlock; } /* * (SYN|ACK) is bogus on a listen socket. */ if (thflags & TH_ACK) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN|ACK invalid, segment rejected\n", s, __func__); syncache_badack(&inc); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } /* * If the drop_synfin option is enabled, drop all * segments with both the SYN and FIN bits set. * This prevents e.g. nmap from identifying the * TCP/IP stack. * XXX: Poor reasoning. nmap has other methods * and is constantly refining its stack detection * strategies. * XXX: This is a violation of the TCP specification * and was used by RFC1644. */ if ((thflags & TH_FIN) && V_drop_synfin) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN|FIN segment ignored (based on " "sysctl setting)\n", s, __func__); TCPSTAT_INC(tcps_badsyn); goto dropunlock; } /* * Segment's flags are (SYN) or (SYN|FIN). * * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored * as they do not affect the state of the TCP FSM. * The data pointed to by TH_URG and th_urp is ignored. */ KASSERT((thflags & (TH_RST|TH_ACK)) == 0, ("%s: Listen socket: TH_RST or TH_ACK set", __func__)); KASSERT(thflags & (TH_SYN), ("%s: Listen socket: TH_SYN not set", __func__)); #ifdef INET6 /* * If deprecated address is forbidden, * we do not accept SYN to deprecated interface * address to prevent any new inbound connection from * getting established. * When we do not accept SYN, we send a TCP RST, * with deprecated source address (instead of dropping * it). We compromise it as it is much better for peer * to send a RST, and RST will be the final packet * for the exchange. * * If we do not forbid deprecated addresses, we accept * the SYN packet. RFC2462 does not suggest dropping * SYN in this case. * If we decipher RFC2462 5.5.4, it says like this: * 1. use of deprecated addr with existing * communication is okay - "SHOULD continue to be * used" * 2. use of it with new communication: * (2a) "SHOULD NOT be used if alternate address * with sufficient scope is available" * (2b) nothing mentioned otherwise. * Here we fall into (2b) case as we have no choice in * our source address selection - we must obey the peer. * * The wording in RFC2462 is confusing, and there are * multiple description text for deprecated address * handling - worse, they are not exactly the same. * I believe 5.5.4 is the best one, so we follow 5.5.4. */ if (isipv6 && !V_ip6_use_deprecated) { struct in6_ifaddr *ia6; ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 != NULL && (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { ifa_free(&ia6->ia_ifa); if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt to deprecated " "IPv6 address rejected\n", s, __func__); rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } if (ia6) ifa_free(&ia6->ia_ifa); } #endif /* INET6 */ /* * Basic sanity checks on incoming SYN requests: * Don't respond if the destination is a link layer * broadcast according to RFC1122 4.2.3.10, p. 104. * If it is from this socket it must be forged. * Don't respond if the source or destination is a * global or subnet broad- or multicast address. * Note that it is quite possible to receive unicast * link-layer packets with a broadcast IP address. Use * in_broadcast() to find them. */ if (m->m_flags & (M_BCAST|M_MCAST)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from broad- or multicast " "link layer address ignored\n", s, __func__); goto dropunlock; } #ifdef INET6 if (isipv6) { if (th->th_dport == th->th_sport && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt to/from self " "ignored\n", s, __func__); goto dropunlock; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from/to multicast " "address ignored\n", s, __func__); goto dropunlock; } } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { if (th->th_dport == th->th_sport && ip->ip_dst.s_addr == ip->ip_src.s_addr) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from/to self " "ignored\n", s, __func__); goto dropunlock; } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " "Connection attempt from/to broad- " "or multicast address ignored\n", s, __func__); goto dropunlock; } } #endif /* * SYN appears to be valid. Create compressed TCP state * for syncache. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); tcp_dooptions(&to, optp, optlen, TO_SYN); syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); /* * Entry added to syncache and mbuf consumed. * Only the listen socket is unlocked by syncache_add(). */ if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } else if (tp->t_state == TCPS_LISTEN) { /* * When a listen socket is torn down the SO_ACCEPTCONN * flag is removed first while connections are drained * from the accept queue in a unlock/lock cycle of the * ACCEPT_LOCK, opening a race condition allowing a SYN * attempt go through unhandled. */ goto dropunlock; } #ifdef TCP_SIGNATURE if (sig_checked == 0) { tcp_dooptions(&to, optp, optlen, (thflags & TH_SYN) ? TO_SYN : 0); if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, th, tp->t_flags)) { /* * In SYN_SENT state if it receives an RST, it is * allowed for further processing. */ if ((thflags & TH_RST) == 0 || (tp->t_state == TCPS_SYN_SENT) == 0) goto dropunlock; } sig_checked = 1; } #endif TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); /* * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later * state. tcp_do_segment() always consumes the mbuf chain, unlocks * the inpcb, and unlocks pcbinfo. */ tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); dropwithreset: TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(inp); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); m = NULL; /* mbuf chain got consumed. */ goto drop; dropunlock: if (m != NULL) TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif if (inp != NULL) INP_WUNLOCK(inp); drop: INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); if (s != NULL) free(s, M_TCPLOG); if (m != NULL) m_freem(m); return (IPPROTO_DONE); } static void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, int ti_locked) { int thflags, acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; u_long tiwin; char *s; struct in_conninfo *inc; struct mbuf *mfree; struct tcpopt to; #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif thflags = th->th_flags; inc = &tp->t_inpcb->inp_inc; tp->sackhint.last_sack_ack = 0; /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we * allow the tcbinfo to be in either alocked or unlocked, as the * caller may have unnecessarily acquired a write lock due to a race. */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) INP_INFO_RLOCK_ASSERT(&V_tcbinfo); else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); /* * Segment received on connection. * Reset idle time and keep-alive timer. * XXX: This should be done after segment * validation to ignore broken/spoofed segs. */ tp->t_rcvtime = ticks; if (TCPS_HAVEESTABLISHED(tp->t_state)) tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); /* * Scale up the window into a 32-bit value. * For the SYN_SENT state the scale is zero. */ tiwin = th->th_win << tp->snd_scale; /* * TCP ECN processing. */ if (tp->t_flags & TF_ECN_PERMIT) { if (thflags & TH_CWR) tp->t_flags &= ~TF_ECN_SND_ECE; switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->t_flags |= TF_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: TCPSTAT_INC(tcps_ecn_ect0); break; case IPTOS_ECN_ECT1: TCPSTAT_INC(tcps_ecn_ect1); break; } /* Process a packet differently from RFC3168. */ cc_ecnpkt_handler(tp, th, iptos); /* Congestion experienced. */ if (thflags & TH_ECE) { cc_cong_signal(tp, th, CC_ECN); } } /* * Parse options on any incoming segment. */ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize * timestamp if syncookies were used when this connection * was established. */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) to.to_tsecr = 0; } /* * If timestamps were negotiated during SYN/ACK they should * appear on every segment during this session and vice versa. */ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp missing, " "no action\n", s, __func__); free(s, M_TCPLOG); } } if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Timestamp not expected, " "no action\n", s, __func__); free(s, M_TCPLOG); } } /* * Process options only when we get SYN/ACK back. The SYN case * for incoming connections is handled in tcp_syncache. * According to RFC1323 the window field in a SYN (i.e., a * or ) segment itself is never scaled. * XXX this is traditional behavior, may need to be cleaned up. */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; } /* * Initial send window. It will be updated with * the next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_ts_getticks(); } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; } /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has * no control flags, is in-sequence, the window didn't * change and we're not retransmitting, it's a * candidate. If the length is zero and the ack moved * forward, we're the sender side of the xfer. Just * free the data acked & wake any higher level process * that was blocked waiting for space. If the length * is non-zero and the ack didn't move, we're the * receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data to * the socket buffer and note that we need a delayed ack. * Make sure that the hidden state-flags are also off. * Since we check for TCPS_ESTABLISHED first, it can only * be TH_NEEDSYN. */ if (tp->t_state == TCPS_ESTABLISHED && th->th_seq == tp->rcv_nxt && (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && tp->snd_nxt == tp->snd_max && tiwin && tiwin == tp->snd_wnd && ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && LIST_EMPTY(&tp->t_segq) && ((to.to_flags & TOF_TS) == 0 || TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { /* * If last ACK falls within this segment's sequence numbers, * record the timestamp. * NOTE that the test is modified according to the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if ((to.to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to.to_tsval; } if (tlen == 0) { if (SEQ_GT(th->th_ack, tp->snd_una) && SEQ_LEQ(th->th_ack, tp->snd_max) && !IN_RECOVERY(tp->t_flags) && (to.to_flags & TOF_SACK) == 0 && TAILQ_EMPTY(&tp->snd_holes)) { /* * This is a pure ack for outstanding data. */ if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; TCPSTAT_INC(tcps_predack); /* * "bad retransmit" recovery. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) { cc_cong_signal(tp, th, CC_RTO_ERR); } /* * Recalculate the transmit timer / rtt. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { u_int t; t = tcp_ts_getticks() - to.to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } acked = BYTES_THIS_ACK(tp, th); /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, &to); TCPSTAT_INC(tcps_rcvackpack); TCPSTAT_ADD(tcps_rcvackbyte, acked); sbdrop(&so->so_snd, acked); if (SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* * Let the congestion control algorithm update * congestion control related information. This * typically means increasing the congestion * window. */ cc_ack_received(tp, th, CC_ACK); tp->snd_una = th->th_ack; /* * Pull snd_wl2 up to prevent seq wrap relative * to th_ack. */ tp->snd_wl2 = th->th_ack; tp->t_dupacks = 0; m_freem(m); /* * If all outstanding data are acked, stop * retransmit timer, otherwise restart timer * using current (possibly backed-off) value. * If process is waiting for space, * wakeup/selwakeup/signal. If data * are ready to send, let tcp_output * decide between more output or persist. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); if (tp->snd_una == tp->snd_max) tcp_timer_activate(tp, TT_REXMT, 0); else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); sowwakeup(so); if (sbavail(&so->so_snd)) (void) tcp_output(tp); goto check_delack; } } else if (th->th_ack == tp->snd_una && tlen <= sbspace(&so->so_rcv)) { int newsize = 0; /* automatic sockbuf scaling */ /* * This is a pure, in-sequence data packet with * nothing on the reassembly queue and we have enough * buffer space to take it. */ if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; /* Clean receiver SACK report if present */ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) tcp_clean_sackreport(tp); TCPSTAT_INC(tcps_preddat); tp->rcv_nxt += tlen; /* * Pull snd_wl1 up to prevent seq wrap relative to * th_seq. */ tp->snd_wl1 = th->th_seq; /* * Pull rcv_up up to prevent seq wrap relative to * rcv_nxt. */ tp->rcv_up = tp->rcv_nxt; TCPSTAT_INC(tcps_rcvpack); TCPSTAT_ADD(tcps_rcvbyte, tlen); #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); /* * Automatic sizing of receive socket buffer. Often the send * buffer size is not optimally adjusted to the actual network * conditions at hand (delay bandwidth product). Setting the * buffer size too small limits throughput on links with high * bandwidth and high delay (eg. trans-continental/oceanic links). * * On the receive side the socket buffer memory is only rarely * used to any significant extent. This allows us to be much * more aggressive in scaling the receive socket buffer. For * the case that the buffer space is actually used to a large * extent and we run out of kernel memory we can simply drop * the new segments; TCP on the sender will just retransmit it * later. Setting the buffer size too big may only consume too * much kernel memory if the application doesn't read() from * the socket or packet loss or reordering makes use of the * reassembly queue. * * The criteria to step up the receive buffer one notch are: * 1. Application has not set receive buffer size with * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. * 2. the number of bytes received during the time it takes * one timestamp to be reflected back to us (the RTT); * 3. received bytes per RTT is within seven eighth of the * current socket buffer size; * 4. receive buffer size has not hit maximal automatic size; * * This algorithm does one step per RTT at most and only if * we receive a bulk stream w/o packet losses or reorderings. * Shrinking the buffer during idle times is not necessary as * it doesn't consume any memory when idle. * * TODO: Only step up if the application is actually serving * the buffer to better manage the socket buffer resources. */ if (V_tcp_do_autorcvbuf && (to.to_flags & TOF_TS) && to.to_tsecr && (so->so_rcv.sb_flags & SB_AUTOSIZE)) { if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && to.to_tsecr - tp->rfbuf_ts < hz) { if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) && so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { newsize = min(so->so_rcv.sb_hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); } /* Start over with next RTT. */ tp->rfbuf_ts = 0; tp->rfbuf_cnt = 0; } else tp->rfbuf_cnt += tlen; /* add up */ } /* Add data to socket buffer. */ SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { /* * Set new socket buffer size. * Give up when limit is reached. */ if (newsize) if (!sbreserve_locked(&so->so_rcv, newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); } /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); if (DELAY_ACK(tp, tlen)) { tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; tcp_output(tp); } goto check_delack; } } /* * Calculate amount of space in receive window, * and then do TCP input processing. * Receive window is amount of space in rcv queue, * but not less than advertised window. */ win = sbspace(&so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); /* Reset receive buffer auto scaling when not in bulk receive mode. */ tp->rfbuf_ts = 0; tp->rfbuf_cnt = 0; switch (tp->t_state) { /* * If the state is SYN_RECEIVED: * if seg contains an ACK, but not for our SYN/ACK, send a RST. */ case TCPS_SYN_RECEIVED: if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } break; /* * If the state is SYN_SENT: * if seg contains an ACK, but not for our SYN, drop the input. * if seg contains a RST, then drop the connection. * if seg does not contain SYN, then drop it. * Otherwise this is an acceptable SYN segment * initialize tp->rcv_nxt and tp->irs * if seg contains ack then advance tp->snd_una * if seg contains an ECE and ECN support is enabled, the stream * is ECN capable. * if SYN has been acked change to ESTABLISHED else SYN_RCVD state * arrange for segment to be acked (eventually) * continue processing rest of data/controls, beginning with URG */ case TCPS_SYN_SENT: if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { TCP_PROBE5(connect__refused, NULL, tp, mtod(m, const char *), tp, th); tp = tcp_drop(tp, ECONNREFUSED); } if (thflags & TH_RST) goto drop; if (!(thflags & TH_SYN)) goto drop; tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; } tp->rcv_adv += imin(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ /* * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ if (DELAY_ACK(tp, tlen) && tlen != 0) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); else tp->t_flags |= TF_ACKNOW; if ((thflags & TH_ECE) && V_tcp_do_ecn) { tp->t_flags |= TF_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } /* * Received in SYN_SENT[*] state. * Transitions: * SYN_SENT --> ESTABLISHED * SYN_SENT* --> FIN_WAIT_1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; thflags &= ~TH_SYN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(connect__established, NULL, tp, mtod(m, const char *), tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } } else { /* * Received initial SYN in SYN-SENT[*] state => * simultaneous open. * If it succeeds, connection is * half-synchronized. * Otherwise, do 3-way handshake: * SYN-SENT -> SYN-RECEIVED * SYN-SENT* -> SYN-RECEIVED* */ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_timer_activate(tp, TT_REXMT, 0); tcp_state_change(tp, TCPS_SYN_RECEIVED); } KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " "ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* * Advance th->th_seq to correspond to first data byte. * If data, trim to stay within window, * dropping FIN if necessary. */ th->th_seq++; if (tlen > tp->rcv_wnd) { todrop = tlen - tp->rcv_wnd; m_adj(m, -todrop); tlen = tp->rcv_wnd; thflags &= ~TH_FIN; TCPSTAT_INC(tcps_rcvpackafterwin); TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); } tp->snd_wl1 = th->th_seq - 1; tp->rcv_up = th->th_seq; /* * Client side of transaction: already sent SYN and data. * If the remote host used T/TCP to validate the SYN, * our data will be ACK'd; if so, enter normal data segment * processing in the middle of step 5, ack processing. * Otherwise, goto step 6. */ if (thflags & TH_ACK) goto process_ACK; goto step6; /* * If the state is LAST_ACK or CLOSING or TIME_WAIT: * do normal processing. * * NB: Leftover from RFC1644 T/TCP. Cases to be reused later. */ case TCPS_LAST_ACK: case TCPS_CLOSING: break; /* continue normal processing */ } /* * States other than LISTEN or SYN_SENT. * First check the RST flag and sequence number since reset segments * are exempt from the timestamp and connection count tests. This * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix * below which allowed reset segments in half the sequence space * to fall though and be processed (which gives forged reset * segments with a random sequence number a 50 percent chance of * killing a connection). * Then check timestamp, if present. * Then check the connection count, if present. * Then check that at least some bytes of segment are within * receive window. If segment begins before rcv_nxt, * drop leading data (and SYN); if nothing left, just ack. */ if (thflags & TH_RST) { /* * RFC5961 Section 3.2 * * - RST drops connection only if SEG.SEQ == RCV.NXT. * - If RST is in window, we send challenge ACK. * * Note: to take into account delayed ACKs, we should * test against last_ack_sent instead of rcv_nxt. * Note 2: we handle special case of closed window, not * covered by the RFC. */ if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED, ("%s: TH_RST ti_locked %d, th %p tp %p", __func__, ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); if (V_tcp_insecure_rst || tp->last_ack_sent == th->th_seq) { TCPSTAT_INC(tcps_drops); /* Drop the connection. */ switch (tp->t_state) { case TCPS_SYN_RECEIVED: so->so_error = ECONNREFUSED; goto close; case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: so->so_error = ECONNRESET; close: tcp_state_change(tp, TCPS_CLOSED); /* FALLTHROUGH */ default: tp = tcp_close(tp); } } else { TCPSTAT_INC(tcps_badrst); /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; } } goto drop; } /* * RFC5961 Section 4.2 * Send challenge ACK for any SYN in synchronized state. */ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) { KASSERT(ti_locked == TI_RLOCKED, ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn && SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { tp = tcp_drop(tp, ECONNRESET); rstreason = BANDLIM_UNLIMITED; } else { /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; } goto drop; } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { /* Check to see if ts_recent is over 24 days old. */ if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates * ts_recent, the age will be reset later and ts_recent * will get a valid value. If it does not, setting * ts_recent to zero will at least satisfy the * requirement that zero be placed in the timestamp * echo reply when ts_recent isn't valid. The * age isn't reset until we get a valid ts_recent * because we don't want out-of-order segments to be * dropped when ts_recent is old. */ tp->ts_recent = 0; } else { TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, tlen); TCPSTAT_INC(tcps_pawsdrop); if (tlen) goto dropafterack; goto drop; } } /* * In the SYN-RECEIVED state, validate that the packet belongs to * this connection before trimming the data to fit the receive * window. Check the sequence number versus IRS since we know * the sequence numbers haven't wrapped. This is a partial fix * for the "LAND" DoS attack. */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } todrop = tp->rcv_nxt - th->th_seq; if (todrop > 0) { if (thflags & TH_SYN) { thflags &= ~TH_SYN; th->th_seq++; if (th->th_urp > 1) th->th_urp--; else thflags &= ~TH_URG; todrop--; } /* * Following if statement from Stevens, vol. 2, p. 960. */ if (todrop > tlen || (todrop == tlen && (thflags & TH_FIN) == 0)) { /* * Any valid FIN must be to the left of the window. * At this point the FIN must be a duplicate or out * of sequence; drop it. */ thflags &= ~TH_FIN; /* * Send an ACK to resynchronize and drop any data. * But keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; todrop = tlen; TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, todrop); } else { TCPSTAT_INC(tcps_rcvpartduppack); TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); } drop_hdrlen += todrop; /* drop from the top afterwards */ th->th_seq += todrop; tlen -= todrop; if (th->th_urp > todrop) th->th_urp -= todrop; else { thflags &= ~TH_URG; th->th_urp = 0; } } /* * If new data are received on a connection after the * user processes are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data " "after socket was closed, " "sending RST and removing tcpcb\n", s, __func__, tcpstates[tp->t_state], tlen); free(s, M_TCPLOG); } tp = tcp_close(tp); TCPSTAT_INC(tcps_rcvafterclose); rstreason = BANDLIM_UNLIMITED; goto dropwithreset; } /* * If segment ends after window, drop trailing data * (and PUSH and FIN); if nothing left, just ACK. */ todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); if (todrop > 0) { TCPSTAT_INC(tcps_rcvpackafterwin); if (todrop >= tlen) { TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen); /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from * incoming segments. Continue processing, but * remember to ack. Otherwise, drop segment * and ack. */ if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_rcvwinprobe); } else goto dropafterack; } else TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); m_adj(m, -todrop); tlen -= todrop; thflags &= ~(TH_PUSH|TH_FIN); } /* * If last ACK falls within this segment's sequence numbers, * record its timestamp. * NOTE: * 1) That the test incorporates suggestions from the latest * proposal of the tcplw@cray.com list (Braden 1993/04/26). * 2) That updating only on newer timestamps interferes with * our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. * 3) That we modify the segment boundary check to be * Last.ACK.Sent <= SEG.SEQ + SEG.Len * instead of RFC1323's * Last.ACK.Sent < SEG.SEQ + SEG.Len, * This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated * Vol. 2 p.869. In such cases, we can still calculate the * RTT correctly when RCV.NXT == Last.ACK.Sent. */ if ((to.to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN|TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to.to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN * flag is on (half-synchronized state), then queue data for * later processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_state == TCPS_SYN_RECEIVED || (tp->t_flags & TF_NEEDSYN)) goto step6; else if (tp->t_flags & TF_ACKNOW) goto dropafterack; else goto drop; } /* * Ack processing. */ switch (tp->t_state) { /* * In SYN_RECEIVED state, the ack ACKs our SYN, so enter * ESTABLISHED state and continue processing. * The ACK was checked above. */ case TCPS_SYN_RECEIVED: TCPSTAT_INC(tcps_connects); soisconnected(so); /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_wnd = tiwin; } /* * Make transitions: * SYN-RECEIVED -> ESTABLISHED * SYN-RECEIVED* -> FIN-WAIT-1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(accept__established, NULL, tp, mtod(m, const char *), tp, th); cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. */ if (tlen == 0 && (thflags & TH_FIN) == 0) (void) tcp_reass(tp, (struct tcphdr *)0, 0, (struct mbuf *)0); tp->snd_wl1 = th->th_seq - 1; /* FALLTHROUGH */ /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range * ACKs. If the ack is in the range * tp->snd_una < th->th_ack <= tp->snd_max * then advance tp->snd_una to th->th_ack and drop * data from the retransmission queue. If this ACK reflects * more up to date window information we update our window information. */ case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: if (SEQ_GT(th->th_ack, tp->snd_max)) { TCPSTAT_INC(tcps_rcvacktoomuch); goto dropafterack; } if ((tp->t_flags & TF_SACK_PERMIT) && ((to.to_flags & TOF_SACK) || !TAILQ_EMPTY(&tp->snd_holes))) tcp_sack_doack(tp, &to, th->th_ack); /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, &to); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { /* * If this is the first time we've seen a * FIN from the remote, this is not a * duplicate and it needs to be processed * normally. This happens during a * simultaneous close. */ if ((thflags & TH_FIN) && (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { tp->t_dupacks = 0; break; } TCPSTAT_INC(tcps_rcvdupack); /* * If we have outstanding data (other than * a window probe), this is a completely * duplicate ack (ie, window info didn't * change and FIN isn't set), * the ack is the biggest we've * seen and we've seen exactly our rexmt * threshhold of them, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion * window so we send only this one * packet. * * We know we're losing at the current * window size so do congestion avoidance * (set ssthresh to half the current window * and pull our congestion window back to * the new ssthresh). * * Dup acks mean that packets have left the * network (they're now cached at the receiver) * so bump cwnd by the amount in the receiver * to keep a constant cwnd packets in the * network. * * When using TCP ECN, notify the peer that * we reduced the cwnd. */ if (!tcp_timer_active(tp, TT_REXMT) || th->th_ack != tp->snd_una) tp->t_dupacks = 0; else if (++tp->t_dupacks > tcprexmtthresh || IN_FASTRECOVERY(tp->t_flags)) { cc_ack_received(tp, th, CC_DUPACK); if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags)) { int awnd; /* * Compute the amount of data in flight first. * We can inject new data into the pipe iff * we have less than 1/2 the original window's * worth of data in flight. */ awnd = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += tp->t_maxseg; if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; } } else tp->snd_cwnd += tp->t_maxseg; (void) tcp_output(tp); goto drop; } else if (tp->t_dupacks == tcprexmtthresh) { tcp_seq onxt = tp->snd_nxt; /* * If we're doing sack, check to * see if we're already in sack * recovery. If we're not doing sack, * check to see if we're in newreno * recovery. */ if (tp->t_flags & TF_SACK_PERMIT) { if (IN_FASTRECOVERY(tp->t_flags)) { tp->t_dupacks = 0; break; } } else { if (SEQ_LEQ(th->th_ack, tp->snd_recover)) { tp->t_dupacks = 0; break; } } /* Congestion signal before ack. */ cc_cong_signal(tp, th, CC_NDUPACK); cc_ack_received(tp, th, CC_DUPACK); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; if (tp->t_flags & TF_SACK_PERMIT) { TCPSTAT_INC( tcps_sack_recovery_episode); tp->sack_newdata = tp->snd_nxt; tp->snd_cwnd = tp->t_maxseg; (void) tcp_output(tp); goto drop; } tp->snd_nxt = th->th_ack; tp->snd_cwnd = tp->t_maxseg; (void) tcp_output(tp); KASSERT(tp->snd_limited <= 2, ("%s: tp->snd_limited too big", __func__)); tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * (tp->t_dupacks - tp->snd_limited); if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (V_tcp_do_rfc3042) { + /* + * Process first and second duplicate + * ACKs. Each indicates a segment + * leaving the network, creating room + * for more. Make sure we can send a + * packet on reception of each duplicate + * ACK by increasing snd_cwnd by one + * segment. Restore the original + * snd_cwnd after packet transmission. + */ cc_ack_received(tp, th, CC_DUPACK); u_long oldcwnd = tp->snd_cwnd; tcp_seq oldsndmax = tp->snd_max; u_int sent; int avail; KASSERT(tp->t_dupacks == 1 || tp->t_dupacks == 2, ("%s: dupacks not 1 or 2", __func__)); if (tp->t_dupacks == 1) tp->snd_limited = 0; tp->snd_cwnd = (tp->snd_nxt - tp->snd_una) + (tp->t_dupacks - tp->snd_limited) * tp->t_maxseg; /* * Only call tcp_output when there * is new data available to be sent. * Otherwise we would send pure ACKs. */ SOCKBUF_LOCK(&so->so_snd); avail = sbavail(&so->so_snd) - (tp->snd_nxt - tp->snd_una); SOCKBUF_UNLOCK(&so->so_snd); if (avail > 0) (void) tcp_output(tp); sent = tp->snd_max - oldsndmax; if (sent > tp->t_maxseg) { KASSERT((tp->t_dupacks == 2 && tp->snd_limited == 0) || (sent == tp->t_maxseg + 1 && tp->t_flags & TF_SENTFIN), ("%s: sent too much", __func__)); tp->snd_limited = 2; } else if (sent > 0) ++tp->snd_limited; tp->snd_cwnd = oldcwnd; goto drop; } } else tp->t_dupacks = 0; break; } KASSERT(SEQ_GT(th->th_ack, tp->snd_una), ("%s: th_ack <= snd_una", __func__)); /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ if (IN_FASTRECOVERY(tp->t_flags)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { if (tp->t_flags & TF_SACK_PERMIT) tcp_sack_partialack(tp, th); else tcp_newreno_partial_ack(tp, th); } else cc_post_recovery(tp, th); } tp->t_dupacks = 0; /* * If we reach this point, ACK is not a duplicate, * i.e., it ACKs something we sent. */ if (tp->t_flags & TF_NEEDSYN) { /* * T/TCP: Connection was half-synchronized, and our * SYN has been ACK'd (so connection is now fully * synchronized). Go to non-starred state, * increment snd_una for ACK of SYN, and check if * we can do window scaling. */ tp->t_flags &= ~TF_NEEDSYN; tp->snd_una++; /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; /* Send window already scaled. */ } } process_ACK: INP_WLOCK_ASSERT(tp->t_inpcb); acked = BYTES_THIS_ACK(tp, th); TCPSTAT_INC(tcps_rcvackpack); TCPSTAT_ADD(tcps_rcvackbyte, acked); /* * If we just performed our first retransmit, and the ACK * arrives within our recovery window, then it was a mistake * to do the retransmit in the first place. Recover our * original cwnd and ssthresh, and proceed to transmit where * we left off. */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && (int)(ticks - tp->t_badrxtwin) < 0) cc_cong_signal(tp, th, CC_RTO_ERR); /* * If we have a timestamp reply, update smoothed * round trip time. If no timestamp is present but * transmit timer is running and timed sequence * number was acked, update smoothed round trip time. * Since we now have an rtt measurement, cancel the * timer backoff (cf., Phil Karn's retransmit alg.). * Recompute the initial retransmit timer. * * Some boxes send broken timestamp replies * during the SYN+ACK phase, ignore * timestamps of 0 or we could calculate a * huge RTT and blow up the retransmit timer. */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { u_int t; t = tcp_ts_getticks() - to.to_tsecr; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) tp->t_rttlow = ticks - tp->t_rtttime; tcp_xmit_timer(tp, ticks - tp->t_rtttime); } /* * If all outstanding data is acked, stop retransmit * timer and remember to restart (more output or persist). * If there is more data to be acked, restart retransmit * timer, using current (possibly backed-off) value. */ if (th->th_ack == tp->snd_max) { tcp_timer_activate(tp, TT_REXMT, 0); needoutput = 1; } else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); /* * If no data (only SYN) was ACK'd, * skip rest of ACK processing. */ if (acked == 0) goto step6; /* * Let the congestion control algorithm update congestion * control related information. This typically means increasing * the congestion window. */ cc_ack_received(tp, th, CC_ACK); SOCKBUF_LOCK(&so->so_snd); if (acked > sbavail(&so->so_snd)) { tp->snd_wnd -= sbavail(&so->so_snd); mfree = sbcut_locked(&so->so_snd, (int)sbavail(&so->so_snd)); ourfinisacked = 1; } else { mfree = sbcut_locked(&so->so_snd, acked); tp->snd_wnd -= acked; ourfinisacked = 0; } /* NB: sowwakeup_locked() does an implicit unlock. */ sowwakeup_locked(so); m_freem(mfree); /* Detect una wraparound. */ if (!IN_RECOVERY(tp->t_flags) && SEQ_GT(tp->snd_una, tp->snd_recover) && SEQ_LEQ(th->th_ack, tp->snd_recover)) tp->snd_recover = th->th_ack - 1; /* XXXLAS: Can this be moved up into cc_post_recovery? */ if (IN_RECOVERY(tp->t_flags) && SEQ_GEQ(th->th_ack, tp->snd_recover)) { EXIT_RECOVERY(tp->t_flags); } tp->snd_una = th->th_ack; if (tp->t_flags & TF_SACK_PERMIT) { if (SEQ_GT(tp->snd_una, tp->snd_recover)) tp->snd_recover = tp->snd_una; } if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; switch (tp->t_state) { /* * In FIN_WAIT_1 STATE in addition to the processing * for the ESTABLISHED state if our FIN is now acknowledged * then enter FIN_WAIT_2. */ case TCPS_FIN_WAIT_1: if (ourfinisacked) { /* * If we can't receive any more * data, then closing user can proceed. * Starting the timer is contrary to the * specification, but if we don't get a FIN * we'll hang forever. * * XXXjl: * we should release the tp also, and use a * compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); tcp_timer_activate(tp, TT_2MSL, (tcp_fast_finwait2_recycle ? tcp_finwait2_timeout : TP_MAXIDLE(tp))); } tcp_state_change(tp, TCPS_FIN_WAIT_2); } break; /* * In CLOSING STATE in addition to the processing for * the ESTABLISHED state if the ACK acknowledges our FIN * then enter the TIME-WAIT state, otherwise ignore * the segment. */ case TCPS_CLOSING: if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); m_freem(m); return; } break; /* * In LAST_ACK, we may still be waiting for data to drain * and/or to be acked, as well as for the ack of our FIN. * If our FIN is now acknowledged, delete the TCB, * enter the closed state and return. */ case TCPS_LAST_ACK: if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); goto drop; } break; } } step6: INP_WLOCK_ASSERT(tp->t_inpcb); /* * Update window information. * Don't look at window if no ACK: TAC's send garbage on first SYN. */ if ((thflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) || (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ if (tlen == 0 && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) TCPSTAT_INC(tcps_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; needoutput = 1; } /* * Process segments with URG. */ if ((thflags & TH_URG) && th->th_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * This is a kludge, but if we receive and accept * random urgent pointers, we'll crash in * soreceive. It's hard to imagine someone * actually wanting to send this much urgent data. */ SOCKBUF_LOCK(&so->so_rcv); if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ goto dodata; /* XXX */ } /* * If this segment advances the known urgent pointer, * then mark the data stream. This should not happen * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since * a FIN has been received from the remote side. * In these states we ignore the URG. * * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section as the original * spec states (in one of two places). */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; so->so_oobmark = sbavail(&so->so_rcv) + (tp->rcv_up - tp->rcv_nxt) - 1; if (so->so_oobmark == 0) so->so_rcv.sb_state |= SBS_RCVATMARK; sohasoutofband(so); tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); } SOCKBUF_UNLOCK(&so->so_rcv); /* * Remove out of band data so doesn't get presented to user. * This can happen independent of advancing the URG pointer, * but if two URG's are pending at once, some out-of-band * data may creep in... ick. */ if (th->th_urp <= (u_long)tlen && !(so->so_options & SO_OOBINLINE)) { /* hdr drop is delayed */ tcp_pulloutofband(so, th, m, drop_hdrlen); } } else { /* * If no out of band data is expected, * pull receive urgent pointer along * with the receive window. */ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ INP_WLOCK_ASSERT(tp->t_inpcb); /* * Process the segment text, merging it into the TCP sequencing queue, * and arranging for acknowledgment of receipt if necessary. * This process logically involves adjusting tp->rcv_wnd as data * is presented to the user (this happens in tcp_usrreq.c, * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ if ((tlen || (thflags & TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; m_adj(m, drop_hdrlen); /* delayed header drop */ /* * Insert segment which includes th into TCP reassembly queue * with control block tp. Set thflags to whether reassembly now * includes a segment with FIN. This handles the common case * inline (segment is the next to be received on an established * connection, and the queue is empty), avoiding linkage into * and removal from the queue and repetition of various * conversions. * Set DELACK for segments received in order, but ack * immediately when segments are out of order (so * fast retransmit can work). */ if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq) && TCPS_HAVEESTABLISHED(tp->t_state)) { if (DELAY_ACK(tp, tlen)) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt += tlen; thflags = th->th_flags & TH_FIN; TCPSTAT_INC(tcps_rcvpack); TCPSTAT_ADD(tcps_rcvbyte, tlen); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(m); else sbappendstream_locked(&so->so_rcv, m, 0); /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); } else { /* * XXX: Due to the header drop above "th" is * theoretically invalid by now. Fortunately * m_adj() doesn't actually frees any mbufs * when trimming from the head. */ thflags = tcp_reass(tp, th, &tlen, m); tp->t_flags |= TF_ACKNOW; } if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) tcp_update_sack_list(tp, save_start, save_start + tlen); #if 0 /* * Note the amount of data that peer has sent into * our window, in order to estimate the sender's * buffer size. * XXX: Unused. */ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); else len = so->so_rcv.sb_hiwat; #endif } else { m_freem(m); thflags &= ~TH_FIN; } /* * If FIN is received ACK the FIN and let the user know * that the connection is closing. */ if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); /* * If connection is half-synchronized * (ie NEEDSYN flag on) then delay ACK, * so it may be piggybacked when SYN is sent. * Otherwise, since we received a FIN then no * more input can be expected, send ACK now. */ if (tp->t_flags & TF_NEEDSYN) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt++; } switch (tp->t_state) { /* * In SYN_RECEIVED and ESTABLISHED STATES * enter the CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_CLOSE_WAIT); break; /* * If still in FIN_WAIT_1 STATE FIN has not been acked so * enter the CLOSING state. */ case TCPS_FIN_WAIT_1: tcp_state_change(tp, TCPS_CLOSING); break; /* * In FIN_WAIT_2 state enter the TIME_WAIT state, * starting the time-wait timer, turning off the other * standard timers. */ case TCPS_FIN_WAIT_2: INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " "TCP_FIN_WAIT_2 ti_locked: %d", __func__, ti_locked)); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); return; } } if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); /* * Return any desired output. */ if (needoutput || (tp->t_flags & TF_ACKNOW)) (void) tcp_output(tp); check_delack: KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } INP_WUNLOCK(tp->t_inpcb); return; dropafterack: /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. * * We can now skip the test for the RST flag since all * paths to this code happen after packets containing * RST have been dropped. * * In the SYN-RECEIVED state, don't send an ACK unless the * segment we received passes the SYN-RECEIVED ACK test. * If it fails send a RST. This breaks the loop in the * "LAND" DoS attack, and also prevents an ACK storm * between two listening ports that have been sent forged * SYN segments, each with the source address of the other. */ if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); INP_WUNLOCK(tp->t_inpcb); m_freem(m); return; dropwithreset: if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); return; drop: if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } #ifdef INVARIANTS else INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); #endif /* * Drop space held by incoming segment and return. */ #ifdef TCPDEBUG if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); m_freem(m); } /* * Issue RST and make ACK acceptable to originator of segment. * The mbuf must still include the original packet header. * tp may be NULL. */ static void tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int tlen, int rstreason) { #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif if (tp != NULL) { INP_WLOCK_ASSERT(tp->t_inpcb); } /* Don't bother if destination was broadcast/multicast. */ if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST)) goto drop; #ifdef INET6 if (mtod(m, struct ip *)->ip_v == 6) { ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) goto drop; /* IPv6 anycast check is done at tcp6_input() */ } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip = mtod(m, struct ip *); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) goto drop; } #endif /* Perform bandwidth limiting. */ if (badport_bandlim(rstreason) < 0) goto drop; /* tcp_respond consumes the mbuf chain. */ if (th->th_flags & TH_ACK) { tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack, TH_RST); } else { if (th->th_flags & TH_SYN) tlen++; tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK); } return; drop: m_freem(m); } /* * Parse TCP options and place in tcpopt. */ static void tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags) { int opt, optlen; to->to_flags = 0; for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { if (cnt < 2) break; optlen = cp[1]; if (optlen < 2 || optlen > cnt) break; } switch (opt) { case TCPOPT_MAXSEG: if (optlen != TCPOLEN_MAXSEG) continue; if (!(flags & TO_SYN)) continue; to->to_flags |= TOF_MSS; bcopy((char *)cp + 2, (char *)&to->to_mss, sizeof(to->to_mss)); to->to_mss = ntohs(to->to_mss); break; case TCPOPT_WINDOW: if (optlen != TCPOLEN_WINDOW) continue; if (!(flags & TO_SYN)) continue; to->to_flags |= TOF_SCALE; to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT); break; case TCPOPT_TIMESTAMP: if (optlen != TCPOLEN_TIMESTAMP) continue; to->to_flags |= TOF_TS; bcopy((char *)cp + 2, (char *)&to->to_tsval, sizeof(to->to_tsval)); to->to_tsval = ntohl(to->to_tsval); bcopy((char *)cp + 6, (char *)&to->to_tsecr, sizeof(to->to_tsecr)); to->to_tsecr = ntohl(to->to_tsecr); break; #ifdef TCP_SIGNATURE /* * XXX In order to reply to a host which has set the * TCP_SIGNATURE option in its initial SYN, we have to * record the fact that the option was observed here * for the syncache code to perform the correct response. */ case TCPOPT_SIGNATURE: if (optlen != TCPOLEN_SIGNATURE) continue; to->to_flags |= TOF_SIGNATURE; to->to_signature = cp + 2; break; #endif case TCPOPT_SACK_PERMITTED: if (optlen != TCPOLEN_SACK_PERMITTED) continue; if (!(flags & TO_SYN)) continue; if (!V_tcp_do_sack) continue; to->to_flags |= TOF_SACKPERM; break; case TCPOPT_SACK: if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) continue; if (flags & TO_SYN) continue; to->to_flags |= TOF_SACK; to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; to->to_sacks = cp + 2; TCPSTAT_INC(tcps_sack_rcv_blocks); break; default: continue; } } } /* * Pull out of band byte out of a segment so * it doesn't appear in the user's data queue. * It is still reflected in the segment length for * sequencing purposes. */ static void tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m, int off) { int cnt = off + th->th_urp - 1; while (cnt >= 0) { if (m->m_len > cnt) { char *cp = mtod(m, caddr_t) + cnt; struct tcpcb *tp = sototcpcb(so); INP_WLOCK_ASSERT(tp->t_inpcb); tp->t_iobc = *cp; tp->t_oobflags |= TCPOOB_HAVEDATA; bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); m->m_len--; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len--; return; } cnt -= m->m_len; m = m->m_next; if (m == NULL) break; } panic("tcp_pulloutofband"); } /* * Collect new round-trip time estimate * and update averages and current timeout. */ static void tcp_xmit_timer(struct tcpcb *tp, int rtt) { int delta; INP_WLOCK_ASSERT(tp->t_inpcb); TCPSTAT_INC(tcps_rttupdated); tp->t_rttupdated++; if (tp->t_srtt != 0) { /* * srtt is stored as fixed point with 5 bits after the * binary point (i.e., scaled by 8). The following magic * is equivalent to the smoothing algorithm in rfc793 with * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed * point). Adjust rtt to origin 0. */ delta = ((rtt - 1) << TCP_DELTA_SHIFT) - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); if ((tp->t_srtt += delta) <= 0) tp->t_srtt = 1; /* * We accumulate a smoothed rtt variance (actually, a * smoothed mean difference), then set the retransmit * timer to smoothed rtt + 4 times the smoothed variance. * rttvar is stored as fixed point with 4 bits after the * binary point (scaled by 16). The following is * equivalent to rfc793 smoothing with an alpha of .75 * (rttvar = rttvar*3/4 + |delta| / 4). This replaces * rfc793's wired-in beta. */ if (delta < 0) delta = -delta; delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); if ((tp->t_rttvar += delta) <= 0) tp->t_rttvar = 1; if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar) tp->t_rttbest = tp->t_srtt + tp->t_rttvar; } else { /* * No rtt measurement yet - use the unsmoothed rtt. * Set the variance to half the rtt (so our first * retransmit happens at 3*rtt). */ tp->t_srtt = rtt << TCP_RTT_SHIFT; tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); tp->t_rttbest = tp->t_srtt + tp->t_rttvar; } tp->t_rtttime = 0; tp->t_rxtshift = 0; /* * the retransmit should happen at rtt + 4 * rttvar. * Because of the way we do the smoothing, srtt and rttvar * will each average +1/2 tick of bias. When we compute * the retransmit timer, we want 1/2 tick of rounding and * 1 extra tick because of +-1/2 tick uncertainty in the * firing of the timer. The bias will give us exactly the * 1.5 tick we need. But, because the bias is * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). */ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); /* * We received an ack for a packet that wasn't retransmitted; * it is probably safe to discard any error indications we've * received recently. This isn't quite right, but close enough * for now (a route might have failed after we sent a segment, * and the return path might not be symmetrical). */ tp->t_softerror = 0; } /* * Determine a reasonable value for maxseg size. * If the route is known, check route for mtu. * If none, use an mss that can be handled on the outgoing interface * without forcing IP to fragment. If no route is found, route has no mtu, * or the destination isn't local, use a default, hopefully conservative * size (usually 512 or the default IP max size, but no more than the mtu * of the interface), as we can't discover anything about intervening * gateways or networks. We also initialize the congestion/slow start * window to be a single segment if the destination isn't local. * While looking at the routing entry, we also initialize other path-dependent * parameters from pre-set or cached values in the routing entry. * * Also take into account the space needed for options that we * send regularly. Make maxseg shorter by that amount to assure * that we can send maxseg amount of data even when the options * are present. Store the upper limit of the length of options plus * data in maxopd. * * NOTE that this routine is only called when we process an incoming * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS * settings are handled in tcp_mssopt(). */ void tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap) { int mss = 0; u_long maxmtu = 0; struct inpcb *inp = tp->t_inpcb; struct hc_metrics_lite metrics; int origoffer; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : sizeof (struct tcpiphdr); #else const size_t min_protoh = sizeof(struct tcpiphdr); #endif INP_WLOCK_ASSERT(tp->t_inpcb); if (mtuoffer != -1) { KASSERT(offer == -1, ("%s: conflict", __func__)); offer = mtuoffer - min_protoh; } origoffer = offer; /* Initialize. */ #ifdef INET6 if (isipv6) { maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt; } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { maxmtu = tcp_maxmtu(&inp->inp_inc, cap); tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt; } #endif /* * No route to sender, stay with default mss and return. */ if (maxmtu == 0) { /* * In case we return early we need to initialize metrics * to a defined state as tcp_hc_get() would do for us * if there was no cache hit. */ if (metricptr != NULL) bzero(metricptr, sizeof(struct hc_metrics_lite)); return; } /* What have we got? */ switch (offer) { case 0: /* * Offer == 0 means that there was no MSS on the SYN * segment, in this case we use tcp_mssdflt as * already assigned to t_maxopd above. */ offer = tp->t_maxopd; break; case -1: /* * Offer == -1 means that we didn't receive SYN yet. */ /* FALLTHROUGH */ default: /* * Prevent DoS attack with too small MSS. Round up * to at least minmss. */ offer = max(offer, V_tcp_minmss); } /* * rmx information is now retrieved from tcp_hostcache. */ tcp_hc_get(&inp->inp_inc, &metrics); if (metricptr != NULL) bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite)); /* * If there's a discovered mtu in tcp hostcache, use it. * Else, use the link mtu. */ if (metrics.rmx_mtu) mss = min(metrics.rmx_mtu, maxmtu) - min_protoh; else { #ifdef INET6 if (isipv6) { mss = maxmtu - min_protoh; if (!V_path_mtu_discovery && !in6_localaddr(&inp->in6p_faddr)) mss = min(mss, V_tcp_v6mssdflt); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { mss = maxmtu - min_protoh; if (!V_path_mtu_discovery && !in_localaddr(inp->inp_faddr)) mss = min(mss, V_tcp_mssdflt); } #endif /* * XXX - The above conditional (mss = maxmtu - min_protoh) * probably violates the TCP spec. * The problem is that, since we don't know the * other end's MSS, we are supposed to use a conservative * default. But, if we do that, then MTU discovery will * never actually take place, because the conservative * default is much less than the MTUs typically seen * on the Internet today. For the moment, we'll sweep * this under the carpet. * * The conservative default might not actually be a problem * if the only case this occurs is when sending an initial * SYN with options and data to a host we've never talked * to before. Then, they will reply with an MSS value which * will get recorded and the new parameters should get * recomputed. For Further Study. */ } mss = min(mss, offer); /* * Sanity check: make sure that maxopd will be large * enough to allow some data on segments even if the * all the option space is used (40bytes). Otherwise * funny things may happen in tcp_output. */ mss = max(mss, 64); /* * maxopd stores the maximum length of data AND options * in a segment; maxseg is the amount of data in a normal * segment. We need to store this value (maxopd) apart * from maxseg, because now every segment carries options * and thus we normally have somewhat less data in segments. */ tp->t_maxopd = mss; /* * origoffer==-1 indicates that no segments were received yet. * In this case we just guess. */ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && (origoffer == -1 || (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) mss -= TCPOLEN_TSTAMP_APPA; tp->t_maxseg = mss; } void tcp_mss(struct tcpcb *tp, int offer) { int mss; u_long bufsize; struct inpcb *inp; struct socket *so; struct hc_metrics_lite metrics; struct tcp_ifcap cap; KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); bzero(&cap, sizeof(cap)); tcp_mss_update(tp, offer, -1, &metrics, &cap); mss = tp->t_maxseg; inp = tp->t_inpcb; /* * If there's a pipesize, change the socket buffer to that size, * don't change if sb_hiwat is different than default (then it * has been changed on purpose with setsockopt). * Make the socket buffers an integral number of mss units; * if the mss is larger than the socket buffer, decrease the mss. */ so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe) bufsize = metrics.rmx_sendpipe; else bufsize = so->so_snd.sb_hiwat; if (bufsize < mss) mss = bufsize; else { bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_snd.sb_hiwat) (void)sbreserve_locked(&so->so_snd, bufsize, so, NULL); } SOCKBUF_UNLOCK(&so->so_snd); tp->t_maxseg = mss; SOCKBUF_LOCK(&so->so_rcv); if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe) bufsize = metrics.rmx_recvpipe; else bufsize = so->so_rcv.sb_hiwat; if (bufsize > mss) { bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; if (bufsize > so->so_rcv.sb_hiwat) (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); } SOCKBUF_UNLOCK(&so->so_rcv); /* Check the interface for TSO capabilities. */ if (cap.ifcap & CSUM_TSO) { tp->t_flags |= TF_TSO; tp->t_tsomax = cap.tsomax; tp->t_tsomaxsegcount = cap.tsomaxsegcount; tp->t_tsomaxsegsize = cap.tsomaxsegsize; } } /* * Determine the MSS option to send on an outgoing SYN. */ int tcp_mssopt(struct in_conninfo *inc) { int mss = 0; u_long maxmtu = 0; u_long thcmtu = 0; size_t min_protoh; KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer")); #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { mss = V_tcp_v6mssdflt; maxmtu = tcp_maxmtu6(inc, NULL); min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { mss = V_tcp_mssdflt; maxmtu = tcp_maxmtu(inc, NULL); min_protoh = sizeof(struct tcpiphdr); } #endif #if defined(INET6) || defined(INET) thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ #endif if (maxmtu && thcmtu) mss = min(maxmtu, thcmtu) - min_protoh; else if (maxmtu || thcmtu) mss = max(maxmtu, thcmtu) - min_protoh; return (mss); } /* * On a partial ack arrives, force the retransmission of the * next unacknowledged segment. Do not clear tp->t_dupacks. * By setting snd_nxt to ti_ack, this forces retransmission timer to * be started again. */ static void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) { tcp_seq onxt = tp->snd_nxt; u_long ocwnd = tp->snd_cwnd; INP_WLOCK_ASSERT(tp->t_inpcb); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; tp->snd_nxt = th->th_ack; /* * Set snd_cwnd to one segment beyond acknowledged offset. * (tp->snd_una has not yet been updated when this function is called.) */ tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); tp->snd_cwnd = ocwnd; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; /* * Partial window deflation. Relies on fact that tp->snd_una * not updated yet. */ if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); else tp->snd_cwnd = 0; tp->snd_cwnd += tp->t_maxseg; } Index: projects/clang370-import/sys/powerpc/powerpc/swtch32.S =================================================================== --- projects/clang370-import/sys/powerpc/powerpc/swtch32.S (revision 288925) +++ projects/clang370-import/sys/powerpc/powerpc/swtch32.S (revision 288926) @@ -1,205 +1,207 @@ /* $FreeBSD$ */ /* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "assym.s" #include "opt_sched.h" #include #include #include #include #include /* * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) mr %r2, %r4 li %r14,0 /* Tell cpu_switchin not to release a thread */ b cpu_switchin /* * void cpu_switch(struct thread *old, * struct thread *new, * struct mutex *mtx); * * Switch to a new thread saving the current state in the old thread. */ ENTRY(cpu_switch) lwz %r6,TD_PCB(%r3) /* Get the old thread's PCB ptr */ stmw %r12,PCB_CONTEXT(%r6) /* Save the non-volatile GP regs. These can now be used for scratch */ mfcr %r16 /* Save the condition register */ stw %r16,PCB_CR(%r6) mflr %r16 /* Save the link register */ stw %r16,PCB_LR(%r6) stw %r1,PCB_SP(%r6) /* Save the stack pointer */ mr %r14,%r3 /* Copy the old thread ptr... */ mr %r2,%r4 /* and the new thread ptr in curthread */ mr %r16,%r5 /* and the new lock */ mr %r17,%r6 /* and the PCB */ lwz %r7,PCB_FLAGS(%r17) /* Save FPU context if needed */ andi. %r7, %r7, PCB_FPU beq .L1 bl save_fpu .L1: mr %r3,%r14 /* restore old thread ptr */ lwz %r7,PCB_FLAGS(%r17) /* Save Altivec context if needed */ andi. %r7, %r7, PCB_VEC beq .L2 bl save_vec .L2: mr %r3,%r14 /* restore old thread ptr */ bl pmap_deactivate /* Deactivate the current pmap */ sync /* Make sure all of that finished */ cpu_switchin: #if defined(SMP) && defined(SCHED_ULE) /* Wait for the new thread to become unblocked */ bl _GLOBAL_OFFSET_TABLE_@local-4 mflr %r6 lwz %r6,blocked_lock@got(%r6) blocked_loop: lwz %r7,TD_LOCK(%r2) cmpw %r6,%r7 beq- blocked_loop isync #endif lwz %r17,TD_PCB(%r2) /* Get new current PCB */ lwz %r1,PCB_SP(%r17) /* Load new stack pointer */ /* Release old thread now that we have a stack pointer set up */ cmpwi %r14,0 beq- 1f stw %r16,TD_LOCK(%r14) /* ULE: update old thread's lock */ 1: mfsprg %r7,0 /* Get the pcpu pointer */ stw %r2,PC_CURTHREAD(%r7) /* Store new current thread */ lwz %r17,TD_PCB(%r2) /* Store new current PCB */ stw %r17,PC_CURPCB(%r7) mr %r3,%r2 /* Get new thread ptr */ bl pmap_activate /* Activate the new address space */ lwz %r6, PCB_FLAGS(%r17) /* Restore FPU context if needed */ andi. %r6, %r6, PCB_FPU beq .L3 mr %r3,%r2 /* Pass curthread to enable_fpu */ bl enable_fpu .L3: lwz %r6, PCB_FLAGS(%r17) /* Restore Altivec context if needed */ andi. %r6, %r6, PCB_VEC beq .L4 mr %r3,%r2 /* Pass curthread to enable_vec */ bl enable_vec .L4: /* thread to restore is in r3 */ mr %r3,%r17 /* Recover PCB ptr */ lmw %r12,PCB_CONTEXT(%r3) /* Load the non-volatile GP regs */ lwz %r5,PCB_CR(%r3) /* Load the condition register */ mtcr %r5 lwz %r5,PCB_LR(%r3) /* Load the link register */ mtlr %r5 lwz %r1,PCB_SP(%r3) /* Load the stack pointer */ /* * Perform a dummy stwcx. to clear any reservations we may have * inherited from the previous thread. It doesn't matter if the * stwcx succeeds or not. pcb_context[0] can be clobbered. */ stwcx. %r1, 0, %r3 blr /* * savectx(pcb) * Update pcb, saving current processor state */ ENTRY(savectx) stmw %r12,PCB_CONTEXT(%r3) /* Save the non-volatile GP regs */ mfcr %r4 /* Save the condition register */ stw %r4,PCB_CR(%r3) + mflr %r4 /* Save the link register */ + stw %r4,PCB_LR(%r3) blr /* * fork_trampoline() * Set up the return from cpu_fork() */ ENTRY(fork_trampoline) lwz %r3,CF_FUNC(%r1) lwz %r4,CF_ARG0(%r1) lwz %r5,CF_ARG1(%r1) bl fork_exit addi %r1,%r1,CF_SIZE-FSP /* Allow 8 bytes in front of trapframe to simulate FRAME_SETUP does when allocating space for a frame pointer/saved LR */ b trapexit Index: projects/clang370-import/sys/powerpc/powerpc/swtch64.S =================================================================== --- projects/clang370-import/sys/powerpc/powerpc/swtch64.S (revision 288925) +++ projects/clang370-import/sys/powerpc/powerpc/swtch64.S (revision 288926) @@ -1,278 +1,280 @@ /* $FreeBSD$ */ /* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "assym.s" #include "opt_sched.h" #include #include #include #include TOC_ENTRY(blocked_lock) /* * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) mr %r13, %r4 li %r14,0 /* Tell cpu_switchin not to release a thread */ b cpu_switchin /* * void cpu_switch(struct thread *old, * struct thread *new, * struct mutex *mtx); * * Switch to a new thread saving the current state in the old thread. */ ENTRY(cpu_switch) ld %r6,TD_PCB(%r3) /* Get the old thread's PCB ptr */ std %r12,PCB_CONTEXT(%r6) /* Save the non-volatile GP regs. These can now be used for scratch */ std %r14,PCB_CONTEXT+2*8(%r6) std %r15,PCB_CONTEXT+3*8(%r6) std %r16,PCB_CONTEXT+4*8(%r6) std %r17,PCB_CONTEXT+5*8(%r6) std %r18,PCB_CONTEXT+6*8(%r6) std %r19,PCB_CONTEXT+7*8(%r6) std %r20,PCB_CONTEXT+8*8(%r6) std %r21,PCB_CONTEXT+9*8(%r6) std %r22,PCB_CONTEXT+10*8(%r6) std %r23,PCB_CONTEXT+11*8(%r6) std %r24,PCB_CONTEXT+12*8(%r6) std %r25,PCB_CONTEXT+13*8(%r6) std %r26,PCB_CONTEXT+14*8(%r6) std %r27,PCB_CONTEXT+15*8(%r6) std %r28,PCB_CONTEXT+16*8(%r6) std %r29,PCB_CONTEXT+17*8(%r6) std %r30,PCB_CONTEXT+18*8(%r6) std %r31,PCB_CONTEXT+19*8(%r6) mfcr %r16 /* Save the condition register */ std %r16,PCB_CR(%r6) mflr %r16 /* Save the link register */ std %r16,PCB_LR(%r6) std %r1,PCB_SP(%r6) /* Save the stack pointer */ std %r2,PCB_TOC(%r6) /* Save the TOC pointer */ mr %r14,%r3 /* Copy the old thread ptr... */ mr %r13,%r4 /* and the new thread ptr in curthread*/ mr %r16,%r5 /* and the new lock */ mr %r17,%r6 /* and the PCB */ stdu %r1,-48(%r1) lwz %r7,PCB_FLAGS(%r17) /* Save FPU context if needed */ andi. %r7, %r7, PCB_FPU beq .L1 bl save_fpu nop .L1: mr %r3,%r14 /* restore old thread ptr */ lwz %r7,PCB_FLAGS(%r17) /* Save Altivec context if needed */ andi. %r7, %r7, PCB_VEC beq .L2 bl save_vec nop .L2: mr %r3,%r14 /* restore old thread ptr */ bl pmap_deactivate /* Deactivate the current pmap */ nop sync /* Make sure all of that finished */ cpu_switchin: #if defined(SMP) && defined(SCHED_ULE) /* Wait for the new thread to become unblocked */ ld %r6,TOC_REF(blocked_lock)(%r2) blocked_loop: ld %r7,TD_LOCK(%r13) cmpd %r6,%r7 beq- blocked_loop isync #endif ld %r17,TD_PCB(%r13) /* Get new PCB */ ld %r1,PCB_SP(%r17) /* Load the stack pointer */ /* Release old thread now that we have a stack pointer set up */ cmpdi %r14,0 beq- 1f std %r16,TD_LOCK(%r14) /* ULE: update old thread's lock */ 1: mfsprg %r7,0 /* Get the pcpu pointer */ std %r13,PC_CURTHREAD(%r7) /* Store new current thread */ ld %r17,TD_PCB(%r13) /* Store new current PCB */ std %r17,PC_CURPCB(%r7) mr %r3,%r13 /* Get new thread ptr */ bl pmap_activate /* Activate the new address space */ nop lwz %r6, PCB_FLAGS(%r17) /* Restore FPU context if needed */ andi. %r6, %r6, PCB_FPU beq .L3 mr %r3,%r13 /* Pass curthread to enable_fpu */ bl enable_fpu nop .L3: lwz %r6, PCB_FLAGS(%r17) /* Restore Altivec context if needed */ andi. %r6, %r6, PCB_VEC beq .L4 mr %r3,%r13 /* Pass curthread to enable_vec */ bl enable_vec nop /* thread to restore is in r3 */ .L4: addi %r1,%r1,48 mr %r3,%r17 /* Recover PCB ptr */ ld %r12,PCB_CONTEXT(%r3) /* Load the non-volatile GP regs. */ ld %r14,PCB_CONTEXT+2*8(%r3) ld %r15,PCB_CONTEXT+3*8(%r3) ld %r16,PCB_CONTEXT+4*8(%r3) ld %r17,PCB_CONTEXT+5*8(%r3) ld %r18,PCB_CONTEXT+6*8(%r3) ld %r19,PCB_CONTEXT+7*8(%r3) ld %r20,PCB_CONTEXT+8*8(%r3) ld %r21,PCB_CONTEXT+9*8(%r3) ld %r22,PCB_CONTEXT+10*8(%r3) ld %r23,PCB_CONTEXT+11*8(%r3) ld %r24,PCB_CONTEXT+12*8(%r3) ld %r25,PCB_CONTEXT+13*8(%r3) ld %r26,PCB_CONTEXT+14*8(%r3) ld %r27,PCB_CONTEXT+15*8(%r3) ld %r28,PCB_CONTEXT+16*8(%r3) ld %r29,PCB_CONTEXT+17*8(%r3) ld %r30,PCB_CONTEXT+18*8(%r3) ld %r31,PCB_CONTEXT+19*8(%r3) ld %r5,PCB_CR(%r3) /* Load the condition register */ mtcr %r5 ld %r5,PCB_LR(%r3) /* Load the link register */ mtlr %r5 ld %r1,PCB_SP(%r3) /* Load the stack pointer */ ld %r2,PCB_TOC(%r3) /* Load the TOC pointer */ /* * Perform a dummy stdcx. to clear any reservations we may have * inherited from the previous thread. It doesn't matter if the * stdcx succeeds or not. pcb_context[0] can be clobbered. */ stdcx. %r1, 0, %r3 blr /* * savectx(pcb) * Update pcb, saving current processor state */ ENTRY(savectx) std %r12,PCB_CONTEXT(%r3) /* Save the non-volatile GP regs. */ std %r13,PCB_CONTEXT+1*8(%r3) std %r14,PCB_CONTEXT+2*8(%r3) std %r15,PCB_CONTEXT+3*8(%r3) std %r16,PCB_CONTEXT+4*8(%r3) std %r17,PCB_CONTEXT+5*8(%r3) std %r18,PCB_CONTEXT+6*8(%r3) std %r19,PCB_CONTEXT+7*8(%r3) std %r20,PCB_CONTEXT+8*8(%r3) std %r21,PCB_CONTEXT+9*8(%r3) std %r22,PCB_CONTEXT+10*8(%r3) std %r23,PCB_CONTEXT+11*8(%r3) std %r24,PCB_CONTEXT+12*8(%r3) std %r25,PCB_CONTEXT+13*8(%r3) std %r26,PCB_CONTEXT+14*8(%r3) std %r27,PCB_CONTEXT+15*8(%r3) std %r28,PCB_CONTEXT+16*8(%r3) std %r29,PCB_CONTEXT+17*8(%r3) std %r30,PCB_CONTEXT+18*8(%r3) std %r31,PCB_CONTEXT+19*8(%r3) mfcr %r4 /* Save the condition register */ std %r4,PCB_CR(%r3) std %r2,PCB_TOC(%r3) /* Save the TOC pointer */ + mflr %r4 /* Save the link register */ + std %r4,PCB_LR(%r3) blr /* * fork_trampoline() * Set up the return from cpu_fork() */ ENTRY_NOPROF(fork_trampoline) ld %r3,CF_FUNC(%r1) ld %r4,CF_ARG0(%r1) ld %r5,CF_ARG1(%r1) stdu %r1,-48(%r1) bl fork_exit nop addi %r1,%r1,48+CF_SIZE-FSP /* Allow 8 bytes in front of trapframe to simulate FRAME_SETUP does when allocating space for a frame pointer/saved LR */ b trapexit nop Index: projects/clang370-import/sys/vm/swap_pager.c =================================================================== --- projects/clang370-import/sys/vm/swap_pager.c (revision 288925) +++ projects/clang370-import/sys/vm/swap_pager.c (revision 288926) @@ -1,2862 +1,2860 @@ /*- * Copyright (c) 1998 Matthew Dillon, * Copyright (c) 1994 John S. Dyson * Copyright (c) 1990 University of Utah. * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * New Swap System * Matthew Dillon * * Radix Bitmap 'blists'. * * - The new swapper uses the new radix bitmap code. This should scale * to arbitrarily small or arbitrarily large swap spaces and an almost * arbitrary degree of fragmentation. * * Features: * * - on the fly reallocation of swap during putpages. The new system * does not try to keep previously allocated swap blocks for dirty * pages. * * - on the fly deallocation of swap * * - No more garbage collection required. Unnecessarily allocated swap * blocks only exist for dirty vm_page_t's now and these are already * cycled (in a high-load system) by the pager. We also do on-the-fly * removal of invalidated swap blocks when a page is destroyed * or renamed. * * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_swap.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * SWB_NPAGES must be a power of 2. It may be set to 1, 2, 4, 8, 16 * or 32 pages per allocation. * The 32-page limit is due to the radix code (kern/subr_blist.c). */ #ifndef MAX_PAGEOUT_CLUSTER #define MAX_PAGEOUT_CLUSTER 16 #endif #if !defined(SWB_NPAGES) #define SWB_NPAGES MAX_PAGEOUT_CLUSTER #endif /* * The swblock structure maps an object and a small, fixed-size range * of page indices to disk addresses within a swap area. * The collection of these mappings is implemented as a hash table. * Unused disk addresses within a swap area are allocated and managed * using a blist. */ #define SWCORRECT(n) (sizeof(void *) * (n) / sizeof(daddr_t)) #define SWAP_META_PAGES (SWB_NPAGES * 2) #define SWAP_META_MASK (SWAP_META_PAGES - 1) struct swblock { struct swblock *swb_hnext; vm_object_t swb_object; vm_pindex_t swb_index; int swb_count; daddr_t swb_pages[SWAP_META_PAGES]; }; static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data"); static struct mtx sw_dev_mtx; static TAILQ_HEAD(, swdevt) swtailq = TAILQ_HEAD_INITIALIZER(swtailq); static struct swdevt *swdevhd; /* Allocate from here next */ static int nswapdev; /* Number of swap devices */ int swap_pager_avail; static int swdev_syscall_active = 0; /* serialize swap(on|off) */ static vm_ooffset_t swap_total; SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0, "Total amount of available swap storage."); static vm_ooffset_t swap_reserved; SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0, "Amount of swap storage needed to back all allocated anonymous memory."); static int overcommit = 0; SYSCTL_INT(_vm, OID_AUTO, overcommit, CTLFLAG_RW, &overcommit, 0, "Configure virtual memory overcommit behavior. See tuning(7) " "for details."); static unsigned long swzone; SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0, "Actual size of swap metadata zone"); static unsigned long swap_maxpages; SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0, "Maximum amount of swap supported"); /* bits from overcommit */ #define SWAP_RESERVE_FORCE_ON (1 << 0) #define SWAP_RESERVE_RLIMIT_ON (1 << 1) #define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) int swap_reserve(vm_ooffset_t incr) { return (swap_reserve_by_cred(incr, curthread->td_ucred)); } int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred) { vm_ooffset_t r, s; int res, error; static int curfail; static struct timeval lastfail; struct uidinfo *uip; uip = cred->cr_ruidinfo; if (incr & PAGE_MASK) panic("swap_reserve: & PAGE_MASK"); #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); error = racct_add(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); if (error != 0) return (0); } #endif res = 0; mtx_lock(&sw_dev_mtx); r = swap_reserved + incr; if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) { s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_cnt.v_wire_count; s *= PAGE_SIZE; } else s = 0; s += swap_total; if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s || (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) { res = 1; swap_reserved = r; } mtx_unlock(&sw_dev_mtx); if (res) { UIDINFO_VMSIZE_LOCK(uip); if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 && uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) && priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) res = 0; else uip->ui_vmsize += incr; UIDINFO_VMSIZE_UNLOCK(uip); if (!res) { mtx_lock(&sw_dev_mtx); swap_reserved -= incr; mtx_unlock(&sw_dev_mtx); } } if (!res && ppsratecheck(&lastfail, &curfail, 1)) { printf("uid %d, pid %d: swap reservation for %jd bytes failed\n", uip->ui_uid, curproc->p_pid, incr); } #ifdef RACCT if (!res) { PROC_LOCK(curproc); racct_sub(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); } #endif return (res); } void swap_reserve_force(vm_ooffset_t incr) { struct uidinfo *uip; mtx_lock(&sw_dev_mtx); swap_reserved += incr; mtx_unlock(&sw_dev_mtx); #ifdef RACCT PROC_LOCK(curproc); racct_add_force(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); #endif uip = curthread->td_ucred->cr_ruidinfo; PROC_LOCK(curproc); UIDINFO_VMSIZE_LOCK(uip); uip->ui_vmsize += incr; UIDINFO_VMSIZE_UNLOCK(uip); PROC_UNLOCK(curproc); } void swap_release(vm_ooffset_t decr) { struct ucred *cred; PROC_LOCK(curproc); cred = curthread->td_ucred; swap_release_by_cred(decr, cred); PROC_UNLOCK(curproc); } void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) { struct uidinfo *uip; uip = cred->cr_ruidinfo; if (decr & PAGE_MASK) panic("swap_release: & PAGE_MASK"); mtx_lock(&sw_dev_mtx); if (swap_reserved < decr) panic("swap_reserved < decr"); swap_reserved -= decr; mtx_unlock(&sw_dev_mtx); UIDINFO_VMSIZE_LOCK(uip); if (uip->ui_vmsize < decr) printf("negative vmsize for uid = %d\n", uip->ui_uid); uip->ui_vmsize -= decr; UIDINFO_VMSIZE_UNLOCK(uip); racct_sub_cred(cred, RACCT_SWAP, decr); } -static void swapdev_strategy(struct buf *, struct swdevt *sw); - #define SWM_FREE 0x02 /* free, period */ #define SWM_POP 0x04 /* pop out */ int swap_pager_full = 2; /* swap space exhaustion (task killing) */ static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/ static int nsw_rcount; /* free read buffers */ static int nsw_wcount_sync; /* limit write buffers / synchronous */ static int nsw_wcount_async; /* limit write buffers / asynchronous */ static int nsw_wcount_async_max;/* assigned maximum */ static int nsw_cluster_max; /* maximum VOP I/O allowed */ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_swap_async_max, "I", "Maximum running async swap ops"); static struct swblock **swhash; static int swhash_mask; static struct mtx swhash_mtx; static struct sx sw_alloc_sx; /* * "named" and "unnamed" anon region objects. Try to reduce the overhead * of searching a named list by hashing it just a little. */ #define NOBJLISTS 8 #define NOBJLIST(handle) \ (&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)]) static struct mtx sw_alloc_mtx; /* protect list manipulation */ static struct pagerlst swap_pager_object_list[NOBJLISTS]; static uma_zone_t swap_zone; /* * pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure * calls hooked from other parts of the VM system and do not appear here. * (see vm/swap_pager.h). */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *); static void swap_pager_dealloc(vm_object_t object); static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int); static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int, pgo_getpages_iodone_t, void *); static void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after); static void swap_pager_init(void); static void swap_pager_unswapped(vm_page_t); static void swap_pager_swapoff(struct swdevt *sp); struct pagerops swappagerops = { .pgo_init = swap_pager_init, /* early system initialization of pager */ .pgo_alloc = swap_pager_alloc, /* allocate an OBJT_SWAP object */ .pgo_dealloc = swap_pager_dealloc, /* deallocate an OBJT_SWAP object */ .pgo_getpages = swap_pager_getpages, /* pagein */ .pgo_getpages_async = swap_pager_getpages_async, /* pagein (async) */ .pgo_putpages = swap_pager_putpages, /* pageout */ .pgo_haspage = swap_pager_haspage, /* get backing store status for page */ .pgo_pageunswapped = swap_pager_unswapped, /* remove swap related to page */ }; /* * dmmax is in page-sized chunks with the new swap system. It was * dev-bsized chunks in the old. dmmax is always a power of 2. * * swap_*() routines are externally accessible. swp_*() routines are * internal. */ static int dmmax; static int nswap_lowat = 128; /* in pages, swap_pager_almost_full warn */ static int nswap_hiwat = 512; /* in pages, swap_pager_almost_full warn */ SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &dmmax, 0, "Maximum size of a swap block"); static void swp_sizecheck(void); static void swp_pager_async_iodone(struct buf *bp); static int swapongeom(struct thread *, struct vnode *); static int swaponvp(struct thread *, struct vnode *, u_long); static int swapoff_one(struct swdevt *sp, struct ucred *cred); /* * Swap bitmap functions */ static void swp_pager_freeswapspace(daddr_t blk, int npages); static daddr_t swp_pager_getswapspace(int npages); /* * Metadata functions */ static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index); static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); static void swp_pager_free_nrpage(vm_page_t m) { vm_page_lock(m); if (m->wire_count == 0) vm_page_free(m); vm_page_unlock(m); } /* * SWP_SIZECHECK() - update swap_pager_full indication * * update the swap_pager_almost_full indication and warn when we are * about to run out of swap space, using lowat/hiwat hysteresis. * * Clear swap_pager_full ( task killing ) indication when lowat is met. * * No restrictions on call * This routine may not block. */ static void swp_sizecheck(void) { if (swap_pager_avail < nswap_lowat) { if (swap_pager_almost_full == 0) { printf("swap_pager: out of swap space\n"); swap_pager_almost_full = 1; } } else { swap_pager_full = 0; if (swap_pager_avail > nswap_hiwat) swap_pager_almost_full = 0; } } /* * SWP_PAGER_HASH() - hash swap meta data * * This is an helper function which hashes the swapblk given * the object and page index. It returns a pointer to a pointer * to the object, or a pointer to a NULL pointer if it could not * find a swapblk. */ static struct swblock ** swp_pager_hash(vm_object_t object, vm_pindex_t index) { struct swblock **pswap; struct swblock *swap; index &= ~(vm_pindex_t)SWAP_META_MASK; pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask]; while ((swap = *pswap) != NULL) { if (swap->swb_object == object && swap->swb_index == index ) { break; } pswap = &swap->swb_hnext; } return (pswap); } /* * SWAP_PAGER_INIT() - initialize the swap pager! * * Expected to be started from system init. NOTE: This code is run * before much else so be careful what you depend on. Most of the VM * system has yet to be initialized at this point. */ static void swap_pager_init(void) { /* * Initialize object lists */ int i; for (i = 0; i < NOBJLISTS; ++i) TAILQ_INIT(&swap_pager_object_list[i]); mtx_init(&sw_alloc_mtx, "swap_pager list", NULL, MTX_DEF); mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); /* * Device Stripe, in PAGE_SIZE'd blocks */ dmmax = SWB_NPAGES * 2; } /* * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process * * Expected to be started from pageout process once, prior to entering * its main loop. */ void swap_pager_swap_init(void) { unsigned long n, n2; /* * Number of in-transit swap bp operations. Don't * exhaust the pbufs completely. Make sure we * initialize workable values (0 will work for hysteresis * but it isn't very efficient). * * The nsw_cluster_max is constrained by the bp->b_pages[] * array (MAXPHYS/PAGE_SIZE) and our locally defined * MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are * constrained by the swap device interleave stripe size. * * Currently we hardwire nsw_wcount_async to 4. This limit is * designed to prevent other I/O from having high latencies due to * our pageout I/O. The value 4 works well for one or two active swap * devices but is probably a little low if you have more. Even so, * a higher value would probably generate only a limited improvement * with three or four active swap devices since the system does not * typically have to pageout at extreme bandwidths. We will want * at least 2 per swap devices, and 4 is a pretty good value if you * have one NFS swap device due to the command/ack latency over NFS. * So it all works out pretty well. */ nsw_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER); mtx_lock(&pbuf_mtx); nsw_rcount = (nswbuf + 1) / 2; nsw_wcount_sync = (nswbuf + 3) / 4; nsw_wcount_async = 4; nsw_wcount_async_max = nsw_wcount_async; mtx_unlock(&pbuf_mtx); /* * Initialize our zone. Right now I'm just guessing on the number * we need based on the number of pages in the system. Each swblock * can hold 32 pages, so this is probably overkill. This reservation * is typically limited to around 32MB by default. */ n = vm_cnt.v_page_count / 2; if (maxswzone && n > maxswzone / sizeof(struct swblock)) n = maxswzone / sizeof(struct swblock); n2 = n; swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); if (swap_zone == NULL) panic("failed to create swap_zone."); do { if (uma_zone_reserve_kva(swap_zone, n)) break; /* * if the allocation failed, try a zone two thirds the * size of the previous attempt. */ n -= ((n + 2) / 3); } while (n > 0); if (n2 != n) printf("Swap zone entries reduced from %lu to %lu.\n", n2, n); swap_maxpages = n * SWAP_META_PAGES; swzone = n * sizeof(struct swblock); n2 = n; /* * Initialize our meta-data hash table. The swapper does not need to * be quite as efficient as the VM system, so we do not use an * oversized hash table. * * n: size of hash table, must be power of 2 * swhash_mask: hash table index mask */ for (n = 1; n < n2 / 8; n *= 2) ; swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO); swhash_mask = n - 1; mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF); } /* * SWAP_PAGER_ALLOC() - allocate a new OBJT_SWAP VM object and instantiate * its metadata structures. * * This routine is called from the mmap and fork code to create a new * OBJT_SWAP object. We do this by creating an OBJT_DEFAULT object * and then converting it with swp_pager_meta_build(). * * This routine may block in vm_object_allocate() and create a named * object lookup race, so we must interlock. * * MPSAFE */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; vm_pindex_t pindex; pindex = OFF_TO_IDX(offset + PAGE_MASK + size); if (handle) { mtx_lock(&Giant); /* * Reference existing named region or allocate new one. There * should not be a race here against swp_pager_meta_build() * as called from vm_page_remove() in regards to the lookup * of the handle. */ sx_xlock(&sw_alloc_sx); object = vm_pager_object_lookup(NOBJLIST(handle), handle); if (object == NULL) { if (cred != NULL) { if (!swap_reserve_by_cred(size, cred)) { sx_xunlock(&sw_alloc_sx); mtx_unlock(&Giant); return (NULL); } crhold(cred); } object = vm_object_allocate(OBJT_DEFAULT, pindex); VM_OBJECT_WLOCK(object); object->handle = handle; if (cred != NULL) { object->cred = cred; object->charge = size; } swp_pager_meta_build(object, 0, SWAPBLK_NONE); VM_OBJECT_WUNLOCK(object); } sx_xunlock(&sw_alloc_sx); mtx_unlock(&Giant); } else { if (cred != NULL) { if (!swap_reserve_by_cred(size, cred)) return (NULL); crhold(cred); } object = vm_object_allocate(OBJT_DEFAULT, pindex); VM_OBJECT_WLOCK(object); if (cred != NULL) { object->cred = cred; object->charge = size; } swp_pager_meta_build(object, 0, SWAPBLK_NONE); VM_OBJECT_WUNLOCK(object); } return (object); } /* * SWAP_PAGER_DEALLOC() - remove swap metadata from object * * The swap backing for the object is destroyed. The code is * designed such that we can reinstantiate it later, but this * routine is typically called only when the entire object is * about to be destroyed. * * The object must be locked. */ static void swap_pager_dealloc(vm_object_t object) { /* * Remove from list right away so lookups will fail if we block for * pageout completion. */ if (object->handle != NULL) { mtx_lock(&sw_alloc_mtx); TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list); mtx_unlock(&sw_alloc_mtx); } VM_OBJECT_ASSERT_WLOCKED(object); vm_object_pip_wait(object, "swpdea"); /* * Free all remaining metadata. We only bother to free it from * the swap meta data. We do not attempt to free swapblk's still * associated with vm_page_t's for this object. We do not care * if paging is still in progress on some objects. */ swp_pager_meta_free_all(object); object->handle = NULL; object->type = OBJT_DEAD; } /************************************************************************ * SWAP PAGER BITMAP ROUTINES * ************************************************************************/ /* * SWP_PAGER_GETSWAPSPACE() - allocate raw swap space * * Allocate swap for the requested number of pages. The starting * swap block number (a page index) is returned or SWAPBLK_NONE * if the allocation failed. * * Also has the side effect of advising that somebody made a mistake * when they configured swap and didn't configure enough. * * This routine may not sleep. * * We allocate in round-robin fashion from the configured devices. */ static daddr_t swp_pager_getswapspace(int npages) { daddr_t blk; struct swdevt *sp; int i; blk = SWAPBLK_NONE; mtx_lock(&sw_dev_mtx); sp = swdevhd; for (i = 0; i < nswapdev; i++) { if (sp == NULL) sp = TAILQ_FIRST(&swtailq); if (!(sp->sw_flags & SW_CLOSING)) { blk = blist_alloc(sp->sw_blist, npages); if (blk != SWAPBLK_NONE) { blk += sp->sw_first; sp->sw_used += npages; swap_pager_avail -= npages; swp_sizecheck(); swdevhd = TAILQ_NEXT(sp, sw_list); goto done; } } sp = TAILQ_NEXT(sp, sw_list); } if (swap_pager_full != 2) { printf("swap_pager_getswapspace(%d): failed\n", npages); swap_pager_full = 2; swap_pager_almost_full = 1; } swdevhd = NULL; done: mtx_unlock(&sw_dev_mtx); return (blk); } static int swp_pager_isondev(daddr_t blk, struct swdevt *sp) { return (blk >= sp->sw_first && blk < sp->sw_end); } static void swp_pager_strategy(struct buf *bp) { struct swdevt *sp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (bp->b_blkno >= sp->sw_first && bp->b_blkno < sp->sw_end) { mtx_unlock(&sw_dev_mtx); if ((sp->sw_flags & SW_UNMAPPED) != 0 && unmapped_buf_allowed) { bp->b_data = unmapped_buf; bp->b_offset = 0; } else { pmap_qenter((vm_offset_t)bp->b_data, &bp->b_pages[0], bp->b_bcount / PAGE_SIZE); } sp->sw_strategy(bp, sp); return; } } panic("Swapdev not found"); } /* * SWP_PAGER_FREESWAPSPACE() - free raw swap space * * This routine returns the specified swap blocks back to the bitmap. * * This routine may not sleep. */ static void swp_pager_freeswapspace(daddr_t blk, int npages) { struct swdevt *sp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (blk >= sp->sw_first && blk < sp->sw_end) { sp->sw_used -= npages; /* * If we are attempting to stop swapping on * this device, we don't want to mark any * blocks free lest they be reused. */ if ((sp->sw_flags & SW_CLOSING) == 0) { blist_free(sp->sw_blist, blk - sp->sw_first, npages); swap_pager_avail += npages; swp_sizecheck(); } mtx_unlock(&sw_dev_mtx); return; } } panic("Swapdev not found"); } /* * SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page * range within an object. * * This is a globally accessible routine. * * This routine removes swapblk assignments from swap metadata. * * The external callers of this routine typically have already destroyed * or renamed vm_page_t's associated with this range in the object so * we should be ok. * * The object must be locked. */ void swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) { swp_pager_meta_free(object, start, size); } /* * SWAP_PAGER_RESERVE() - reserve swap blocks in object * * Assigns swap blocks to the specified range within the object. The * swap blocks are not zeroed. Any previous swap assignment is destroyed. * * Returns 0 on success, -1 on failure. */ int swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) { int n = 0; daddr_t blk = SWAPBLK_NONE; vm_pindex_t beg = start; /* save start index */ VM_OBJECT_WLOCK(object); while (size) { if (n == 0) { n = BLIST_MAX_ALLOC; while ((blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE) { n >>= 1; if (n == 0) { swp_pager_meta_free(object, beg, start - beg); VM_OBJECT_WUNLOCK(object); return (-1); } } } swp_pager_meta_build(object, start, blk); --size; ++start; ++blk; --n; } swp_pager_meta_free(object, start, n); VM_OBJECT_WUNLOCK(object); return (0); } /* * SWAP_PAGER_COPY() - copy blocks from source pager to destination pager * and destroy the source. * * Copy any valid swapblks from the source to the destination. In * cases where both the source and destination have a valid swapblk, * we keep the destination's. * * This routine is allowed to sleep. It may sleep allocating metadata * indirectly through swp_pager_meta_build() or if paging is still in * progress on the source. * * The source object contains no vm_page_t's (which is just as well) * * The source object is of type OBJT_SWAP. * * The source and destination objects must be locked. * Both object locks may temporarily be released. */ void swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t offset, int destroysource) { vm_pindex_t i; VM_OBJECT_ASSERT_WLOCKED(srcobject); VM_OBJECT_ASSERT_WLOCKED(dstobject); /* * If destroysource is set, we remove the source object from the * swap_pager internal queue now. */ if (destroysource) { if (srcobject->handle != NULL) { mtx_lock(&sw_alloc_mtx); TAILQ_REMOVE( NOBJLIST(srcobject->handle), srcobject, pager_object_list ); mtx_unlock(&sw_alloc_mtx); } } /* * transfer source to destination. */ for (i = 0; i < dstobject->size; ++i) { daddr_t dstaddr; /* * Locate (without changing) the swapblk on the destination, * unless it is invalid in which case free it silently, or * if the destination is a resident page, in which case the * source is thrown away. */ dstaddr = swp_pager_meta_ctl(dstobject, i, 0); if (dstaddr == SWAPBLK_NONE) { /* * Destination has no swapblk and is not resident, * copy source. */ daddr_t srcaddr; srcaddr = swp_pager_meta_ctl( srcobject, i + offset, SWM_POP ); if (srcaddr != SWAPBLK_NONE) { /* * swp_pager_meta_build() can sleep. */ vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); vm_object_pip_add(dstobject, 1); swp_pager_meta_build(dstobject, i, srcaddr); vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); vm_object_pip_wakeup(srcobject); } } else { /* * Destination has valid swapblk or it is represented * by a resident page. We destroy the sourceblock. */ swp_pager_meta_ctl(srcobject, i + offset, SWM_FREE); } } /* * Free left over swap blocks in source. * * We have to revert the type to OBJT_DEFAULT so we do not accidently * double-remove the object from the swap queues. */ if (destroysource) { swp_pager_meta_free_all(srcobject); /* * Reverting the type is not necessary, the caller is going * to destroy srcobject directly, but I'm doing it here * for consistency since we've removed the object from its * queues. */ srcobject->type = OBJT_DEFAULT; } } /* * SWAP_PAGER_HASPAGE() - determine if we have good backing store for * the requested page. * * We determine whether good backing store exists for the requested * page and return TRUE if it does, FALSE if it doesn't. * * If TRUE, we also try to determine how much valid, contiguous backing * store exists before and after the requested page within a reasonable * distance. We do not try to restrict it to the swap device stripe * (that is handled in getpages/putpages). It probably isn't worth * doing here. */ static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { daddr_t blk0; VM_OBJECT_ASSERT_LOCKED(object); /* * do we have good backing store at the requested index ? */ blk0 = swp_pager_meta_ctl(object, pindex, 0); if (blk0 == SWAPBLK_NONE) { if (before) *before = 0; if (after) *after = 0; return (FALSE); } /* * find backwards-looking contiguous good backing store */ if (before != NULL) { int i; for (i = 1; i < (SWB_NPAGES/2); ++i) { daddr_t blk; if (i > pindex) break; blk = swp_pager_meta_ctl(object, pindex - i, 0); if (blk != blk0 - i) break; } *before = (i - 1); } /* * find forward-looking contiguous good backing store */ if (after != NULL) { int i; for (i = 1; i < (SWB_NPAGES/2); ++i) { daddr_t blk; blk = swp_pager_meta_ctl(object, pindex + i, 0); if (blk != blk0 + i) break; } *after = (i - 1); } return (TRUE); } /* * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page * * This removes any associated swap backing store, whether valid or * not, from the page. * * This routine is typically called when a page is made dirty, at * which point any associated swap can be freed. MADV_FREE also * calls us in a special-case situation * * NOTE!!! If the page is clean and the swap was valid, the caller * should make the page dirty before calling this routine. This routine * does NOT change the m->dirty status of the page. Also: MADV_FREE * depends on it. * * This routine may not sleep. * * The object containing the page must be locked. */ static void swap_pager_unswapped(vm_page_t m) { swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE); } /* * SWAP_PAGER_GETPAGES() - bring pages in from swap * * Attempt to retrieve (m, count) pages from backing store, but make * sure we retrieve at least m[reqpage]. We try to load in as large * a chunk surrounding m[reqpage] as is contiguous in swap and which * belongs to the same object. * * The code is designed for asynchronous operation and * immediate-notification of 'reqpage' but tends not to be * used that way. Please do not optimize-out this algorithmic * feature, I intend to improve on it in the future. * * The parent has a single vm_object_pip_add() reference prior to * calling us and we should return with the same. * * The parent has BUSY'd the pages. We should return with 'm' * left busy, but the others adjusted. */ static int swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) { struct buf *bp; vm_page_t mreq; int i; int j; daddr_t blk; mreq = m[reqpage]; /* * Calculate range to retrieve. The pages have already been assigned * their swapblks. We require a *contiguous* range but we know it to * not span devices. If we do not supply it, bad things * happen. Note that blk, iblk & jblk can be SWAPBLK_NONE, but the * loops are set up such that the case(s) are handled implicitly. * * The swp_*() calls must be made with the object locked. */ blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0); for (i = reqpage - 1; i >= 0; --i) { daddr_t iblk; iblk = swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0); if (blk != iblk + (reqpage - i)) break; } ++i; for (j = reqpage + 1; j < count; ++j) { daddr_t jblk; jblk = swp_pager_meta_ctl(m[j]->object, m[j]->pindex, 0); if (blk != jblk - (j - reqpage)) break; } /* * free pages outside our collection range. Note: we never free * mreq, it must remain busy throughout. */ if (0 < i || j < count) { int k; for (k = 0; k < i; ++k) swp_pager_free_nrpage(m[k]); for (k = j; k < count; ++k) swp_pager_free_nrpage(m[k]); } /* * Return VM_PAGER_FAIL if we have nothing to do. Return mreq * still busy, but the others unbusied. */ if (blk == SWAPBLK_NONE) return (VM_PAGER_FAIL); /* * Getpbuf() can sleep. */ VM_OBJECT_WUNLOCK(object); /* * Get a swap buffer header to perform the IO */ bp = getpbuf(&nsw_rcount); bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_READ; bp->b_iodone = swp_pager_async_iodone; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); bp->b_blkno = blk - (reqpage - i); bp->b_bcount = PAGE_SIZE * (j - i); bp->b_bufsize = PAGE_SIZE * (j - i); bp->b_pager.pg_reqpage = reqpage - i; VM_OBJECT_WLOCK(object); { int k; for (k = i; k < j; ++k) { bp->b_pages[k - i] = m[k]; m[k]->oflags |= VPO_SWAPINPROG; } } bp->b_npages = j - i; PCPU_INC(cnt.v_swapin); PCPU_ADD(cnt.v_swappgsin, bp->b_npages); /* * We still hold the lock on mreq, and our automatic completion routine * does not remove it. */ vm_object_pip_add(object, bp->b_npages); VM_OBJECT_WUNLOCK(object); /* * perform the I/O. NOTE!!! bp cannot be considered valid after * this point because we automatically release it on completion. * Instead, we look at the one page we are interested in which we * still hold a lock on even through the I/O completion. * * The other pages in our m[] array are also released on completion, * so we cannot assume they are valid anymore either. * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ BUF_KERNPROC(bp); swp_pager_strategy(bp); /* * wait for the page we want to complete. VPO_SWAPINPROG is always * cleared on completion. If an I/O error occurs, SWAPBLK_NONE * is set in the meta-data. */ VM_OBJECT_WLOCK(object); while ((mreq->oflags & VPO_SWAPINPROG) != 0) { mreq->oflags |= VPO_SWAPSLEEP; PCPU_INC(cnt.v_intrans); if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, "swread", hz * 20)) { printf( "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n", bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount); } } /* * mreq is left busied after completion, but all the other pages * are freed. If we had an unrecoverable read error the page will * not be valid. */ if (mreq->valid != VM_PAGE_BITS_ALL) { return (VM_PAGER_ERROR); } else { return (VM_PAGER_OK); } /* * A final note: in a low swap situation, we cannot deallocate swap * and mark a page dirty here because the caller is likely to mark * the page clean when we return, causing the page to possibly revert * to all-zero's later. */ } /* * swap_pager_getpages_async(): * * Right now this is emulation of asynchronous operation on top of * swap_pager_getpages(). */ static int swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, int reqpage, pgo_getpages_iodone_t iodone, void *arg) { int r, error; r = swap_pager_getpages(object, m, count, reqpage); VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: error = 0; break; case VM_PAGER_ERROR: error = EIO; break; case VM_PAGER_FAIL: error = EINVAL; break; default: panic("unhandled swap_pager_getpages() error %d", r); } (iodone)(arg, m, count, error); VM_OBJECT_WLOCK(object); return (r); } /* * swap_pager_putpages: * * Assign swap (if necessary) and initiate I/O on the specified pages. * * We support both OBJT_DEFAULT and OBJT_SWAP objects. DEFAULT objects * are automatically converted to SWAP objects. * * In a low memory situation we may block in VOP_STRATEGY(), but the new * vm_page reservation system coupled with properly written VFS devices * should ensure that no low-memory deadlock occurs. This is an area * which needs work. * * The parent has N vm_object_pip_add() references prior to * calling us and will remove references for rtvals[] that are * not set to VM_PAGER_PEND. We need to remove the rest on I/O * completion. * * The parent has soft-busy'd the pages it passes us and will unbusy * those whos rtvals[] entry is not set to VM_PAGER_PEND on return. * We need to unbusy the rest on I/O completion. */ -void +static void swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { int i, n; boolean_t sync; if (count && m[0]->object != object) { panic("swap_pager_putpages: object mismatch %p/%p", object, m[0]->object ); } /* * Step 1 * * Turn object into OBJT_SWAP * check for bogus sysops * force sync if not pageout process */ if (object->type != OBJT_SWAP) swp_pager_meta_build(object, 0, SWAPBLK_NONE); VM_OBJECT_WUNLOCK(object); n = 0; if (curproc != pageproc) sync = TRUE; else sync = (flags & VM_PAGER_PUT_SYNC) != 0; /* * Step 2 * * Assign swap blocks and issue I/O. We reallocate swap on the fly. * The page is left dirty until the pageout operation completes * successfully. */ for (i = 0; i < count; i += n) { int j; struct buf *bp; daddr_t blk; /* * Maximum I/O size is limited by a number of factors. */ n = min(BLIST_MAX_ALLOC, count - i); n = min(n, nsw_cluster_max); /* * Get biggest block of swap we can. If we fail, fall * back and try to allocate a smaller block. Don't go * overboard trying to allocate space if it would overly * fragment swap. */ while ( (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE && n > 4 ) { n >>= 1; } if (blk == SWAPBLK_NONE) { for (j = 0; j < n; ++j) rtvals[i+j] = VM_PAGER_FAIL; continue; } /* * All I/O parameters have been satisfied, build the I/O * request and assign the swap space. */ if (sync == TRUE) { bp = getpbuf(&nsw_wcount_sync); } else { bp = getpbuf(&nsw_wcount_async); bp->b_flags = B_ASYNC; } bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_WRITE; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); bp->b_bcount = PAGE_SIZE * n; bp->b_bufsize = PAGE_SIZE * n; bp->b_blkno = blk; VM_OBJECT_WLOCK(object); for (j = 0; j < n; ++j) { vm_page_t mreq = m[i+j]; swp_pager_meta_build( mreq->object, mreq->pindex, blk + j ); vm_page_dirty(mreq); mreq->oflags |= VPO_SWAPINPROG; bp->b_pages[j] = mreq; } VM_OBJECT_WUNLOCK(object); bp->b_npages = n; /* * Must set dirty range for NFS to work. */ bp->b_dirtyoff = 0; bp->b_dirtyend = bp->b_bcount; PCPU_INC(cnt.v_swapout); PCPU_ADD(cnt.v_swappgsout, bp->b_npages); /* * We unconditionally set rtvals[] to VM_PAGER_PEND so that we * can call the async completion routine at the end of a * synchronous I/O operation. Otherwise, our caller would * perform duplicate unbusy and wakeup operations on the page * and object, respectively. */ for (j = 0; j < n; j++) rtvals[i + j] = VM_PAGER_PEND; /* * asynchronous * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ if (sync == FALSE) { bp->b_iodone = swp_pager_async_iodone; BUF_KERNPROC(bp); swp_pager_strategy(bp); continue; } /* * synchronous * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ bp->b_iodone = bdone; swp_pager_strategy(bp); /* * Wait for the sync I/O to complete. */ bwait(bp, PVM, "swwrt"); /* * Now that we are through with the bp, we can call the * normal async completion, which frees everything up. */ swp_pager_async_iodone(bp); } VM_OBJECT_WLOCK(object); } /* * swp_pager_async_iodone: * * Completion routine for asynchronous reads and writes from/to swap. * Also called manually by synchronous code to finish up a bp. * * This routine may not sleep. */ static void swp_pager_async_iodone(struct buf *bp) { int i; vm_object_t object = NULL; /* * report error */ if (bp->b_ioflags & BIO_ERROR) { printf( "swap_pager: I/O error - %s failed; blkno %ld," "size %ld, error %d\n", ((bp->b_iocmd == BIO_READ) ? "pagein" : "pageout"), (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error ); } /* * remove the mapping for kernel virtual */ if (buf_mapped(bp)) pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages); else bp->b_data = bp->b_kvabase; if (bp->b_npages) { object = bp->b_pages[0]->object; VM_OBJECT_WLOCK(object); } /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, * we may be able to recover by reassigning the swap later on. So * in this case we remove the m->swapblk assignment for the page * but do not free it in the rlist. The errornous block(s) are thus * never reallocated as swap. Redirty the page and continue. */ for (i = 0; i < bp->b_npages; ++i) { vm_page_t m = bp->b_pages[i]; m->oflags &= ~VPO_SWAPINPROG; if (m->oflags & VPO_SWAPSLEEP) { m->oflags &= ~VPO_SWAPSLEEP; wakeup(&object->paging_in_progress); } if (bp->b_ioflags & BIO_ERROR) { /* * If an error occurs I'd love to throw the swapblk * away without freeing it back to swapspace, so it * can never be used again. But I can't from an * interrupt. */ if (bp->b_iocmd == BIO_READ) { /* * When reading, reqpage needs to stay * locked for the parent, but all other * pages can be freed. We still want to * wakeup the parent waiting on the page, * though. ( also: pg_reqpage can be -1 and * not match anything ). * * We have to wake specifically requested pages * up too because we cleared VPO_SWAPINPROG and * someone may be waiting for that. * * NOTE: for reads, m->dirty will probably * be overridden by the original caller of * getpages so don't play cute tricks here. */ m->valid = 0; if (i != bp->b_pager.pg_reqpage) swp_pager_free_nrpage(m); else { vm_page_lock(m); vm_page_flash(m); vm_page_unlock(m); } /* * If i == bp->b_pager.pg_reqpage, do not wake * the page up. The caller needs to. */ } else { /* * If a write error occurs, reactivate page * so it doesn't clog the inactive list, * then finish the I/O. */ vm_page_dirty(m); vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); vm_page_sunbusy(m); } } else if (bp->b_iocmd == BIO_READ) { /* * NOTE: for reads, m->dirty will probably be * overridden by the original caller of getpages so * we cannot set them in order to free the underlying * swap in a low-swap situation. I don't think we'd * want to do that anyway, but it was an optimization * that existed in the old swapper for a time before * it got ripped out due to precisely this problem. * * If not the requested page then deactivate it. * * Note that the requested page, reqpage, is left * busied, but we still have to wake it up. The * other pages are released (unbusied) by * vm_page_xunbusy(). */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); m->valid = VM_PAGE_BITS_ALL; KASSERT(m->dirty == 0, ("swp_pager_async_iodone: page %p is dirty", m)); /* * We have to wake specifically requested pages * up too because we cleared VPO_SWAPINPROG and * could be waiting for it in getpages. However, * be sure to not unbusy getpages specifically * requested page - getpages expects it to be * left busy. */ if (i != bp->b_pager.pg_reqpage) { vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); vm_page_xunbusy(m); } else { vm_page_lock(m); vm_page_flash(m); vm_page_unlock(m); } } else { /* * For write success, clear the dirty * status, then finish the I/O ( which decrements the * busy count and possibly wakes waiter's up ). */ KASSERT(!pmap_page_is_write_mapped(m), ("swp_pager_async_iodone: page %p is not write" " protected", m)); vm_page_undirty(m); vm_page_sunbusy(m); if (vm_page_count_severe()) { vm_page_lock(m); vm_page_try_to_cache(m); vm_page_unlock(m); } } } /* * adjust pip. NOTE: the original parent may still have its own * pip refs on the object. */ if (object != NULL) { vm_object_pip_wakeupn(object, bp->b_npages); VM_OBJECT_WUNLOCK(object); } /* * swapdev_strategy() manually sets b_vp and b_bufobj before calling * bstrategy(). Set them back to NULL now we're done with it, or we'll * trigger a KASSERT in relpbuf(). */ if (bp->b_vp) { bp->b_vp = NULL; bp->b_bufobj = NULL; } /* * release the physical I/O buffer */ relpbuf( bp, ((bp->b_iocmd == BIO_READ) ? &nsw_rcount : ((bp->b_flags & B_ASYNC) ? &nsw_wcount_async : &nsw_wcount_sync ) ) ); } /* * swap_pager_isswapped: * * Return 1 if at least one page in the given object is paged * out to the given swap device. * * This routine may not sleep. */ int swap_pager_isswapped(vm_object_t object, struct swdevt *sp) { daddr_t index = 0; int bcount; int i; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP) return (0); mtx_lock(&swhash_mtx); for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) { struct swblock *swap; if ((swap = *swp_pager_hash(object, index)) != NULL) { for (i = 0; i < SWAP_META_PAGES; ++i) { if (swp_pager_isondev(swap->swb_pages[i], sp)) { mtx_unlock(&swhash_mtx); return (1); } } } index += SWAP_META_PAGES; } mtx_unlock(&swhash_mtx); return (0); } /* * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in * * This routine dissociates the page at the given index within a * swap block from its backing store, paging it in if necessary. * If the page is paged in, it is placed in the inactive queue, * since it had its backing store ripped out from under it. * We also attempt to swap in all other pages in the swap block, * we only guarantee that the one at the specified index is * paged in. * * XXX - The code to page the whole block in doesn't work, so we * revert to the one-by-one behavior for now. Sigh. */ static inline void swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; vm_object_pip_add(object, 1); m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_wakeup(object); vm_page_dirty(m); vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); vm_page_xunbusy(m); vm_pager_page_unswapped(m); return; } if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_wakeup(object); vm_page_dirty(m); vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); vm_page_xunbusy(m); vm_pager_page_unswapped(m); } /* * swap_pager_swapoff: * * Page in all of the pages that have been paged out to the * given device. The corresponding blocks in the bitmap must be * marked as allocated and the device must be flagged SW_CLOSING. * There may be no processes swapped out to the device. * * This routine may block. */ static void swap_pager_swapoff(struct swdevt *sp) { struct swblock *swap; int i, j, retries; GIANT_REQUIRED; retries = 0; full_rescan: mtx_lock(&swhash_mtx); for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */ restart: for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) { vm_object_t object = swap->swb_object; vm_pindex_t pindex = swap->swb_index; for (j = 0; j < SWAP_META_PAGES; ++j) { if (swp_pager_isondev(swap->swb_pages[j], sp)) { /* avoid deadlock */ if (!VM_OBJECT_TRYWLOCK(object)) { break; } else { mtx_unlock(&swhash_mtx); swp_pager_force_pagein(object, pindex + j); VM_OBJECT_WUNLOCK(object); mtx_lock(&swhash_mtx); goto restart; } } } } } mtx_unlock(&swhash_mtx); if (sp->sw_used) { /* * Objects may be locked or paging to the device being * removed, so we will miss their pages and need to * make another pass. We have marked this device as * SW_CLOSING, so the activity should finish soon. */ retries++; if (retries > 100) { panic("swapoff: failed to locate %d swap blocks", sp->sw_used); } pause("swpoff", hz / 20); goto full_rescan; } } /************************************************************************ * SWAP META DATA * ************************************************************************ * * These routines manipulate the swap metadata stored in the * OBJT_SWAP object. * * Swap metadata is implemented with a global hash and not directly * linked into the object. Instead the object simply contains * appropriate tracking counters. */ /* * SWP_PAGER_META_BUILD() - add swap block to swap meta data for object * * We first convert the object to a swap object if it is a default * object. * * The specified swapblk is added to the object's swap metadata. If * the swapblk is not valid, it is freed instead. Any previously * assigned swapblk is freed. */ static void swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) { static volatile int exhausted; struct swblock *swap; struct swblock **pswap; int idx; VM_OBJECT_ASSERT_WLOCKED(object); /* * Convert default object to swap object if necessary */ if (object->type != OBJT_SWAP) { object->type = OBJT_SWAP; object->un_pager.swp.swp_bcount = 0; if (object->handle != NULL) { mtx_lock(&sw_alloc_mtx); TAILQ_INSERT_TAIL( NOBJLIST(object->handle), object, pager_object_list ); mtx_unlock(&sw_alloc_mtx); } } /* * Locate hash entry. If not found create, but if we aren't adding * anything just return. If we run out of space in the map we wait * and, since the hash table may have changed, retry. */ retry: mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, pindex); if ((swap = *pswap) == NULL) { int i; if (swapblk == SWAPBLK_NONE) goto done; swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT | (curproc == pageproc ? M_USE_RESERVE : 0)); if (swap == NULL) { mtx_unlock(&swhash_mtx); VM_OBJECT_WUNLOCK(object); if (uma_zone_exhausted(swap_zone)) { if (atomic_cmpset_int(&exhausted, 0, 1)) printf("swap zone exhausted, " "increase kern.maxswzone\n"); vm_pageout_oom(VM_OOM_SWAPZ); pause("swzonex", 10); } else VM_WAIT; VM_OBJECT_WLOCK(object); goto retry; } if (atomic_cmpset_int(&exhausted, 1, 0)) printf("swap zone ok\n"); swap->swb_hnext = NULL; swap->swb_object = object; swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK; swap->swb_count = 0; ++object->un_pager.swp.swp_bcount; for (i = 0; i < SWAP_META_PAGES; ++i) swap->swb_pages[i] = SWAPBLK_NONE; } /* * Delete prior contents of metadata */ idx = pindex & SWAP_META_MASK; if (swap->swb_pages[idx] != SWAPBLK_NONE) { swp_pager_freeswapspace(swap->swb_pages[idx], 1); --swap->swb_count; } /* * Enter block into metadata */ swap->swb_pages[idx] = swapblk; if (swapblk != SWAPBLK_NONE) ++swap->swb_count; done: mtx_unlock(&swhash_mtx); } /* * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata * * The requested range of blocks is freed, with any associated swap * returned to the swap bitmap. * * This routine will free swap metadata structures as they are cleaned * out. This routine does *NOT* operate on swap metadata associated * with resident pages. */ static void swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count) { VM_OBJECT_ASSERT_LOCKED(object); if (object->type != OBJT_SWAP) return; while (count > 0) { struct swblock **pswap; struct swblock *swap; mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, index); if ((swap = *pswap) != NULL) { daddr_t v = swap->swb_pages[index & SWAP_META_MASK]; if (v != SWAPBLK_NONE) { swp_pager_freeswapspace(v, 1); swap->swb_pages[index & SWAP_META_MASK] = SWAPBLK_NONE; if (--swap->swb_count == 0) { *pswap = swap->swb_hnext; uma_zfree(swap_zone, swap); --object->un_pager.swp.swp_bcount; } } --count; ++index; } else { int n = SWAP_META_PAGES - (index & SWAP_META_MASK); count -= n; index += n; } mtx_unlock(&swhash_mtx); } } /* * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object * * This routine locates and destroys all swap metadata associated with * an object. */ static void swp_pager_meta_free_all(vm_object_t object) { daddr_t index = 0; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP) return; while (object->un_pager.swp.swp_bcount) { struct swblock **pswap; struct swblock *swap; mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, index); if ((swap = *pswap) != NULL) { int i; for (i = 0; i < SWAP_META_PAGES; ++i) { daddr_t v = swap->swb_pages[i]; if (v != SWAPBLK_NONE) { --swap->swb_count; swp_pager_freeswapspace(v, 1); } } if (swap->swb_count != 0) panic("swap_pager_meta_free_all: swb_count != 0"); *pswap = swap->swb_hnext; uma_zfree(swap_zone, swap); --object->un_pager.swp.swp_bcount; } mtx_unlock(&swhash_mtx); index += SWAP_META_PAGES; } } /* * SWP_PAGER_METACTL() - misc control of swap and vm_page_t meta data. * * This routine is capable of looking up, popping, or freeing * swapblk assignments in the swap meta data or in the vm_page_t. * The routine typically returns the swapblk being looked-up, or popped, * or SWAPBLK_NONE if the block was freed, or SWAPBLK_NONE if the block * was invalid. This routine will automatically free any invalid * meta-data swapblks. * * It is not possible to store invalid swapblks in the swap meta data * (other then a literal 'SWAPBLK_NONE'), so we don't bother checking. * * When acting on a busy resident page and paging is in progress, we * have to wait until paging is complete but otherwise can act on the * busy page. * * SWM_FREE remove and free swap block from metadata * SWM_POP remove from meta data but do not free.. pop it out */ static daddr_t swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags) { struct swblock **pswap; struct swblock *swap; daddr_t r1; int idx; VM_OBJECT_ASSERT_LOCKED(object); /* * The meta data only exists of the object is OBJT_SWAP * and even then might not be allocated yet. */ if (object->type != OBJT_SWAP) return (SWAPBLK_NONE); r1 = SWAPBLK_NONE; mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, pindex); if ((swap = *pswap) != NULL) { idx = pindex & SWAP_META_MASK; r1 = swap->swb_pages[idx]; if (r1 != SWAPBLK_NONE) { if (flags & SWM_FREE) { swp_pager_freeswapspace(r1, 1); r1 = SWAPBLK_NONE; } if (flags & (SWM_FREE|SWM_POP)) { swap->swb_pages[idx] = SWAPBLK_NONE; if (--swap->swb_count == 0) { *pswap = swap->swb_hnext; uma_zfree(swap_zone, swap); --object->un_pager.swp.swp_bcount; } } } } mtx_unlock(&swhash_mtx); return (r1); } /* * System call swapon(name) enables swapping on device name, * which must be in the swdevsw. Return EBUSY * if already swapping on this device. */ #ifndef _SYS_SYSPROTO_H_ struct swapon_args { char *name; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_swapon(struct thread *td, struct swapon_args *uap) { struct vattr attr; struct vnode *vp; struct nameidata nd; int error; error = priv_check(td, PRIV_SWAPON); if (error) return (error); mtx_lock(&Giant); while (swdev_syscall_active) tsleep(&swdev_syscall_active, PUSER - 1, "swpon", 0); swdev_syscall_active = 1; /* * Swap metadata may not fit in the KVM if we have physical * memory of >1GB. */ if (swap_zone == NULL) { error = ENOMEM; goto done; } NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vn_isdisk(vp, &error)) { error = swapongeom(td, vp); } else if (vp->v_type == VREG && (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && (error = VOP_GETATTR(vp, &attr, td->td_ucred)) == 0) { /* * Allow direct swapping to NFS regular files in the same * way that nfs_mountroot() sets up diskless swapping. */ error = swaponvp(td, vp, attr.va_size / DEV_BSIZE); } if (error) vrele(vp); done: swdev_syscall_active = 0; wakeup_one(&swdev_syscall_active); mtx_unlock(&Giant); return (error); } /* * Check that the total amount of swap currently configured does not * exceed half the theoretical maximum. If it does, print a warning * message and return -1; otherwise, return 0. */ static int swapon_check_swzone(unsigned long npages) { unsigned long maxpages; /* absolute maximum we can handle assuming 100% efficiency */ maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES; /* recommend using no more than half that amount */ if (npages > maxpages / 2) { printf("warning: total configured swap (%lu pages) " "exceeds maximum recommended amount (%lu pages).\n", npages, maxpages / 2); printf("warning: increase kern.maxswzone " "or reduce amount of swap.\n"); return (-1); } return (0); } static void swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags) { struct swdevt *sp, *tsp; swblk_t dvbase; u_long mblocks; /* * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. * First chop nblks off to page-align it, then convert. * * sw->sw_nblks is in page-sized chunks now too. */ nblks &= ~(ctodb(1) - 1); nblks = dbtoc(nblks); /* * If we go beyond this, we get overflows in the radix * tree bitmap code. */ mblocks = 0x40000000 / BLIST_META_RADIX; if (nblks > mblocks) { printf( "WARNING: reducing swap size to maximum of %luMB per unit\n", mblocks / 1024 / 1024 * PAGE_SIZE); nblks = mblocks; } sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO); sp->sw_vp = vp; sp->sw_id = id; sp->sw_dev = dev; sp->sw_flags = 0; sp->sw_nblks = nblks; sp->sw_used = 0; sp->sw_strategy = strategy; sp->sw_close = close; sp->sw_flags = flags; sp->sw_blist = blist_create(nblks, M_WAITOK); /* * Do not free the first two block in order to avoid overwriting * any bsd label at the front of the partition */ blist_free(sp->sw_blist, 2, nblks - 2); dvbase = 0; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(tsp, &swtailq, sw_list) { if (tsp->sw_end >= dvbase) { /* * We put one uncovered page between the devices * in order to definitively prevent any cross-device * I/O requests */ dvbase = tsp->sw_end + 1; } } sp->sw_first = dvbase; sp->sw_end = dvbase + nblks; TAILQ_INSERT_TAIL(&swtailq, sp, sw_list); nswapdev++; swap_pager_avail += nblks; swap_total += (vm_ooffset_t)nblks * PAGE_SIZE; swapon_check_swzone(swap_total / PAGE_SIZE); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); } /* * SYSCALL: swapoff(devname) * * Disable swapping on the given device. * * XXX: Badly designed system call: it should use a device index * rather than filename as specification. We keep sw_vp around * only to make this work. */ #ifndef _SYS_SYSPROTO_H_ struct swapoff_args { char *name; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_swapoff(struct thread *td, struct swapoff_args *uap) { struct vnode *vp; struct nameidata nd; struct swdevt *sp; int error; error = priv_check(td, PRIV_SWAPOFF); if (error) return (error); mtx_lock(&Giant); while (swdev_syscall_active) tsleep(&swdev_syscall_active, PUSER - 1, "swpoff", 0); swdev_syscall_active = 1; NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_vp == vp) break; } mtx_unlock(&sw_dev_mtx); if (sp == NULL) { error = EINVAL; goto done; } error = swapoff_one(sp, td->td_ucred); done: swdev_syscall_active = 0; wakeup_one(&swdev_syscall_active); mtx_unlock(&Giant); return (error); } static int swapoff_one(struct swdevt *sp, struct ucred *cred) { u_long nblks, dvbase; #ifdef MAC int error; #endif mtx_assert(&Giant, MA_OWNED); #ifdef MAC (void) vn_lock(sp->sw_vp, LK_EXCLUSIVE | LK_RETRY); error = mac_system_check_swapoff(cred, sp->sw_vp); (void) VOP_UNLOCK(sp->sw_vp, 0); if (error != 0) return (error); #endif nblks = sp->sw_nblks; /* * We can turn off this swap device safely only if the * available virtual memory in the system will fit the amount * of data we will have to page back in, plus an epsilon so * the system doesn't become critically low on swap space. */ if (vm_cnt.v_free_count + vm_cnt.v_cache_count + swap_pager_avail < nblks + nswap_lowat) { return (ENOMEM); } /* * Prevent further allocations on this device. */ mtx_lock(&sw_dev_mtx); sp->sw_flags |= SW_CLOSING; for (dvbase = 0; dvbase < sp->sw_end; dvbase += dmmax) { swap_pager_avail -= blist_fill(sp->sw_blist, dvbase, dmmax); } swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE; mtx_unlock(&sw_dev_mtx); /* * Page in the contents of the device and close it. */ swap_pager_swapoff(sp); sp->sw_close(curthread, sp); mtx_lock(&sw_dev_mtx); sp->sw_id = NULL; TAILQ_REMOVE(&swtailq, sp, sw_list); nswapdev--; if (nswapdev == 0) { swap_pager_full = 2; swap_pager_almost_full = 1; } if (swdevhd == sp) swdevhd = NULL; mtx_unlock(&sw_dev_mtx); blist_destroy(sp->sw_blist); free(sp, M_VMPGDATA); return (0); } void swapoff_all(void) { struct swdevt *sp, *spt; const char *devname; int error; mtx_lock(&Giant); while (swdev_syscall_active) tsleep(&swdev_syscall_active, PUSER - 1, "swpoff", 0); swdev_syscall_active = 1; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH_SAFE(sp, &swtailq, sw_list, spt) { mtx_unlock(&sw_dev_mtx); if (vn_isdisk(sp->sw_vp, NULL)) devname = devtoname(sp->sw_vp->v_rdev); else devname = "[file]"; error = swapoff_one(sp, thread0.td_ucred); if (error != 0) { printf("Cannot remove swap device %s (error=%d), " "skipping.\n", devname, error); } else if (bootverbose) { printf("Swap device %s removed.\n", devname); } mtx_lock(&sw_dev_mtx); } mtx_unlock(&sw_dev_mtx); swdev_syscall_active = 0; wakeup_one(&swdev_syscall_active); mtx_unlock(&Giant); } void swap_pager_status(int *total, int *used) { struct swdevt *sp; *total = 0; *used = 0; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { *total += sp->sw_nblks; *used += sp->sw_used; } mtx_unlock(&sw_dev_mtx); } int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len) { struct swdevt *sp; const char *tmp_devname; int error, n; n = 0; error = ENOENT; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (n != name) { n++; continue; } xs->xsw_version = XSWDEV_VERSION; xs->xsw_dev = sp->sw_dev; xs->xsw_flags = sp->sw_flags; xs->xsw_nblks = sp->sw_nblks; xs->xsw_used = sp->sw_used; if (devname != NULL) { if (vn_isdisk(sp->sw_vp, NULL)) tmp_devname = devtoname(sp->sw_vp->v_rdev); else tmp_devname = "[file]"; strncpy(devname, tmp_devname, len); } error = 0; break; } mtx_unlock(&sw_dev_mtx); return (error); } static int sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) { struct xswdev xs; int error; if (arg2 != 1) /* name length */ return (EINVAL); error = swap_dev_info(*(int *)arg1, &xs, NULL, 0); if (error != 0) return (error); error = SYSCTL_OUT(req, &xs, sizeof(xs)); return (error); } SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswapdev, 0, "Number of swap devices"); SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD, sysctl_vm_swap_info, "Swap statistics by device"); /* * vmspace_swap_count() - count the approximate swap usage in pages for a * vmspace. * * The map must be locked. * * Swap usage is determined by taking the proportional swap used by * VM objects backing the VM map. To make up for fractional losses, * if the VM object has any swap use at all the associated map entries * count for at least 1 swap page. */ long vmspace_swap_count(struct vmspace *vmspace) { vm_map_t map; vm_map_entry_t cur; vm_object_t object; long count, n; map = &vmspace->vm_map; count = 0; for (cur = map->header.next; cur != &map->header; cur = cur->next) { if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && (object = cur->object.vm_object) != NULL) { VM_OBJECT_WLOCK(object); if (object->type == OBJT_SWAP && object->un_pager.swp.swp_bcount != 0) { n = (cur->end - cur->start) / PAGE_SIZE; count += object->un_pager.swp.swp_bcount * SWAP_META_PAGES * n / object->size + 1; } VM_OBJECT_WUNLOCK(object); } } return (count); } /* * GEOM backend * * Swapping onto disk devices. * */ static g_orphan_t swapgeom_orphan; static struct g_class g_swap_class = { .name = "SWAP", .version = G_VERSION, .orphan = swapgeom_orphan, }; DECLARE_GEOM_CLASS(g_swap_class, g_class); static void swapgeom_close_ev(void *arg, int flags) { struct g_consumer *cp; cp = arg; g_access(cp, -1, -1, 0); g_detach(cp); g_destroy_consumer(cp); } /* * Add a reference to the g_consumer for an inflight transaction. */ static void swapgeom_acquire(struct g_consumer *cp) { mtx_assert(&sw_dev_mtx, MA_OWNED); cp->index++; } /* * Remove a reference from the g_consumer. Post a close event if * all referneces go away. */ static void swapgeom_release(struct g_consumer *cp, struct swdevt *sp) { mtx_assert(&sw_dev_mtx, MA_OWNED); cp->index--; if (cp->index == 0) { if (g_post_event(swapgeom_close_ev, cp, M_NOWAIT, NULL) == 0) sp->sw_id = NULL; } } static void swapgeom_done(struct bio *bp2) { struct swdevt *sp; struct buf *bp; struct g_consumer *cp; bp = bp2->bio_caller2; cp = bp2->bio_from; bp->b_ioflags = bp2->bio_flags; if (bp2->bio_error) bp->b_ioflags |= BIO_ERROR; bp->b_resid = bp->b_bcount - bp2->bio_completed; bp->b_error = bp2->bio_error; bufdone(bp); sp = bp2->bio_caller1; mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); g_destroy_bio(bp2); } static void swapgeom_strategy(struct buf *bp, struct swdevt *sp) { struct bio *bio; struct g_consumer *cp; mtx_lock(&sw_dev_mtx); cp = sp->sw_id; if (cp == NULL) { mtx_unlock(&sw_dev_mtx); bp->b_error = ENXIO; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } swapgeom_acquire(cp); mtx_unlock(&sw_dev_mtx); if (bp->b_iocmd == BIO_WRITE) bio = g_new_bio(); else bio = g_alloc_bio(); if (bio == NULL) { mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); bp->b_error = ENOMEM; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } bio->bio_caller1 = sp; bio->bio_caller2 = bp; bio->bio_cmd = bp->b_iocmd; bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE; bio->bio_length = bp->b_bcount; bio->bio_done = swapgeom_done; if (!buf_mapped(bp)) { bio->bio_ma = bp->b_pages; bio->bio_data = unmapped_buf; bio->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK; bio->bio_ma_n = bp->b_npages; bio->bio_flags |= BIO_UNMAPPED; } else { bio->bio_data = bp->b_data; bio->bio_ma = NULL; } g_io_request(bio, cp); return; } static void swapgeom_orphan(struct g_consumer *cp) { struct swdevt *sp; int destroy; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_id == cp) { sp->sw_flags |= SW_CLOSING; break; } } /* * Drop reference we were created with. Do directly since we're in a * special context where we don't have to queue the call to * swapgeom_close_ev(). */ cp->index--; destroy = ((sp != NULL) && (cp->index == 0)); if (destroy) sp->sw_id = NULL; mtx_unlock(&sw_dev_mtx); if (destroy) swapgeom_close_ev(cp, 0); } static void swapgeom_close(struct thread *td, struct swdevt *sw) { struct g_consumer *cp; mtx_lock(&sw_dev_mtx); cp = sw->sw_id; sw->sw_id = NULL; mtx_unlock(&sw_dev_mtx); /* XXX: direct call when Giant untangled */ if (cp != NULL) g_waitfor_event(swapgeom_close_ev, cp, M_WAITOK, NULL); } struct swh0h0 { struct cdev *dev; struct vnode *vp; int error; }; static void swapongeom_ev(void *arg, int flags) { struct swh0h0 *swh; struct g_provider *pp; struct g_consumer *cp; static struct g_geom *gp; struct swdevt *sp; u_long nblks; int error; swh = arg; swh->error = 0; pp = g_dev_getprovider(swh->dev); if (pp == NULL) { swh->error = ENODEV; return; } mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { cp = sp->sw_id; if (cp != NULL && cp->provider == pp) { mtx_unlock(&sw_dev_mtx); swh->error = EBUSY; return; } } mtx_unlock(&sw_dev_mtx); if (gp == NULL) gp = g_new_geomf(&g_swap_class, "swap"); cp = g_new_consumer(gp); cp->index = 1; /* Number of active I/Os, plus one for being active. */ cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; g_attach(cp, pp); /* * XXX: Everytime you think you can improve the margin for * footshooting, somebody depends on the ability to do so: * savecore(8) wants to write to our swapdev so we cannot * set an exclusive count :-( */ error = g_access(cp, 1, 1, 0); if (error) { g_detach(cp); g_destroy_consumer(cp); swh->error = error; return; } nblks = pp->mediasize / DEV_BSIZE; swaponsomething(swh->vp, cp, nblks, swapgeom_strategy, swapgeom_close, dev2udev(swh->dev), (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0); swh->error = 0; } static int swapongeom(struct thread *td, struct vnode *vp) { int error; struct swh0h0 swh; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); swh.dev = vp->v_rdev; swh.vp = vp; swh.error = 0; /* XXX: direct call when Giant untangled */ error = g_waitfor_event(swapongeom_ev, &swh, M_WAITOK, NULL); if (!error) error = swh.error; VOP_UNLOCK(vp, 0); return (error); } /* * VNODE backend * * This is used mainly for network filesystem (read: probably only tested * with NFS) swapfiles. * */ static void swapdev_strategy(struct buf *bp, struct swdevt *sp) { struct vnode *vp2; bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first); vp2 = sp->sw_id; vhold(vp2); if (bp->b_iocmd == BIO_WRITE) { if (bp->b_bufobj) bufobj_wdrop(bp->b_bufobj); bufobj_wref(&vp2->v_bufobj); } if (bp->b_bufobj != &vp2->v_bufobj) bp->b_bufobj = &vp2->v_bufobj; bp->b_vp = vp2; bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); return; } static void swapdev_close(struct thread *td, struct swdevt *sp) { VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td); vrele(sp->sw_vp); } static int swaponvp(struct thread *td, struct vnode *vp, u_long nblks) { struct swdevt *sp; int error; if (nblks == 0) return (ENXIO); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_id == vp) { mtx_unlock(&sw_dev_mtx); return (EBUSY); } } mtx_unlock(&sw_dev_mtx); (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_system_check_swapon(td->td_ucred, vp); if (error == 0) #endif error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, NULL); (void) VOP_UNLOCK(vp, 0); if (error) return (error); swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close, NODEV, 0); return (0); } static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS) { int error, new, n; new = nsw_wcount_async_max; error = sysctl_handle_int(oidp, &new, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new > nswbuf / 2 || new < 1) return (EINVAL); mtx_lock(&pbuf_mtx); while (nsw_wcount_async_max != new) { /* * Adjust difference. If the current async count is too low, * we will need to sqeeze our update slowly in. Sleep with a * higher priority than getpbuf() to finish faster. */ n = new - nsw_wcount_async_max; if (nsw_wcount_async + n >= 0) { nsw_wcount_async += n; nsw_wcount_async_max += n; wakeup(&nsw_wcount_async); } else { nsw_wcount_async_max -= nsw_wcount_async; nsw_wcount_async = 0; msleep(&nsw_wcount_async, &pbuf_mtx, PSWP, "swpsysctl", 0); } } mtx_unlock(&pbuf_mtx); return (0); } Index: projects/clang370-import/sys/vm/vm_kern.c =================================================================== --- projects/clang370-import/sys/vm/vm_kern.c (revision 288925) +++ projects/clang370-import/sys/vm/vm_kern.c (revision 288926) @@ -1,549 +1,549 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Kernel memory management. */ #include __FBSDID("$FreeBSD$"); #include #include #include /* for ticks and hz */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include vm_map_t kernel_map; vm_map_t exec_map; vm_map_t pipe_map; const void *zero_region; CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, #if defined(__arm__) || defined(__sparc64__) &vm_max_kernel_address, 0, #else SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS, #endif "Max kernel address"); /* * kva_alloc: * * Allocate a virtual address range with no underlying object and * no initial mapping to physical memory. Any mapping from this * range to physical memory must be explicitly created prior to * its use, typically with pmap_qenter(). Any attempt to create * a mapping on demand through vm_fault() will result in a panic. */ vm_offset_t kva_alloc(size) vm_size_t size; { vm_offset_t addr; size = round_page(size); if (vmem_alloc(kernel_arena, size, M_BESTFIT | M_NOWAIT, &addr)) return (0); return (addr); } /* * kva_free: * * Release a region of kernel virtual memory allocated * with kva_alloc, and return the physical pages * associated with that region. * * This routine may not block on kernel maps. */ void kva_free(addr, size) vm_offset_t addr; vm_size_t size; { size = round_page(size); vmem_free(kernel_arena, addr, size); } /* * Allocates a region from the kernel address map and physical pages * within the specified address range to the kernel object. Creates a * wired mapping from this region to these pages, and returns the * region's starting virtual address. The allocated pages are not * necessarily physically contiguous. If M_ZERO is specified through the * given flags, then the pages are zeroed before they are mapped. */ vm_offset_t kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; vm_offset_t addr, i; vm_ooffset_t offset; vm_page_t m; int pflags, tries; size = round_page(size); if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; retry: m = vm_page_alloc_contig(object, OFF_TO_IDX(offset + i), pflags, 1, low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { vm_pageout_grow_cache(tries, low, high); VM_OBJECT_WLOCK(object); tries++; goto retry; } kmem_unback(object, addr, i); vmem_free(vmem, addr, size); return (0); } if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, VM_PROT_ALL | PMAP_ENTER_WIRED, 0); } VM_OBJECT_WUNLOCK(object); return (addr); } /* * Allocates a region from the kernel address map and physically * contiguous pages within the specified address range to the kernel * object. Creates a wired mapping from this region to these pages, and * returns the region's starting virtual address. If M_ZERO is specified * through the given flags, then the pages are zeroed before they are * mapped. */ vm_offset_t kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; vm_offset_t addr, tmp; vm_ooffset_t offset; vm_page_t end_m, m; int pflags, tries; size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; VM_OBJECT_WLOCK(object); tries = 0; retry: m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, atop(size), low, high, alignment, boundary, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { vm_pageout_grow_cache(tries, low, high); VM_OBJECT_WLOCK(object); tries++; goto retry; } vmem_free(vmem, addr, size); return (0); } end_m = m + atop(size); tmp = addr; for (; m < end_m; m++) { if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, tmp, m, VM_PROT_ALL, VM_PROT_ALL | PMAP_ENTER_WIRED, 0); tmp += PAGE_SIZE; } VM_OBJECT_WUNLOCK(object); return (addr); } /* * kmem_suballoc: * * Allocates a map to manage a subrange * of the kernel virtual address space. * * Arguments are as follows: * * parent Map to take range from * min, max Returned endpoints of map * size Size of range to find * superpage_align Request that min is superpage aligned */ vm_map_t kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, vm_size_t size, boolean_t superpage_align) { int ret; vm_map_t result; size = round_page(size); *min = vm_map_min(parent); ret = vm_map_find(parent, NULL, 0, min, size, 0, superpage_align ? VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_ACC_NO_CHARGE); if (ret != KERN_SUCCESS) panic("kmem_suballoc: bad status return of %d", ret); *max = *min + size; result = vm_map_create(vm_map_pmap(parent), *min, *max); if (result == NULL) panic("kmem_suballoc: cannot create submap"); if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); return (result); } /* * kmem_malloc: * * Allocate wired-down pages in the kernel's address space. */ vm_offset_t kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) { vm_offset_t addr; int rv; size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); rv = kmem_back((vmem == kmem_arena) ? kmem_object : kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(vmem, addr, size); return (0); } return (addr); } /* * kmem_back: * * Allocate physical pages for the specified virtual address range. */ int kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) { vm_offset_t offset, i; vm_page_t m; int pflags; KASSERT(object == kmem_object || object == kernel_object, ("kmem_back: only supports kernel objects.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { retry: m = vm_page_alloc(object, OFF_TO_IDX(offset + i), pflags); /* * Ran out of space, free everything up and return. Don't need * to lock page queues here as we know that the pages we got * aren't on any queues. */ if (m == NULL) { VM_OBJECT_WUNLOCK(object); if ((flags & M_NOWAIT) == 0) { VM_WAIT; VM_OBJECT_WLOCK(object); goto retry; } kmem_unback(object, addr, i); return (KERN_NO_SPACE); } if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("kmem_malloc: page %p is managed", m)); m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, VM_PROT_ALL | PMAP_ENTER_WIRED, 0); } VM_OBJECT_WUNLOCK(object); return (KERN_SUCCESS); } /* * kmem_unback: * * Unmap and free the physical pages underlying the specified virtual * address range. * * A physical page must exist within the specified object at each index * that is being unmapped. */ void kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { vm_page_t m; vm_offset_t i, offset; KASSERT(object == kmem_object || object == kernel_object, ("kmem_unback: only supports kernel objects.")); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); - vm_page_unwire(m, PQ_INACTIVE); + vm_page_unwire(m, PQ_NONE); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); } /* * kmem_free: * * Free memory allocated with kmem_malloc. The size must match the * original allocation. */ void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { size = round_page(size); kmem_unback((vmem == kmem_arena) ? kmem_object : kernel_object, addr, size); vmem_free(vmem, addr, size); } /* * kmap_alloc_wait: * * Allocates pageable memory from a sub-map of the kernel. If the submap * has no room, the caller sleeps waiting for more memory in the submap. * * This routine may block. */ vm_offset_t kmap_alloc_wait(map, size) vm_map_t map; vm_size_t size; { vm_offset_t addr; size = round_page(size); if (!swap_reserve(size)) return (0); for (;;) { /* * To make this work for more than one map, use the map's lock * to lock out sleepers/wakers. */ vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) break; /* no space now; see if we can ever get space */ if (vm_map_max(map) - vm_map_min(map) < size) { vm_map_unlock(map); swap_release(size); return (0); } map->needs_wakeup = TRUE; vm_map_unlock_and_wait(map, 0); } vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, MAP_ACC_CHARGED); vm_map_unlock(map); return (addr); } /* * kmap_free_wakeup: * * Returns memory to a submap of the kernel, and wakes up any processes * waiting for memory in that map. */ void kmap_free_wakeup(map, addr, size) vm_map_t map; vm_offset_t addr; vm_size_t size; { vm_map_lock(map); (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); if (map->needs_wakeup) { map->needs_wakeup = FALSE; vm_map_wakeup(map); } vm_map_unlock(map); } void kmem_init_zero_region(void) { vm_offset_t addr, i; vm_page_t m; /* * Map a single physical page of zeros to a larger virtual range. * This requires less looping in places that want large amounts of * zeros, while not using much more physical resources. */ addr = kva_alloc(ZERO_REGION_SIZE); m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) pmap_qenter(addr + i, &m, 1); pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ); zero_region = (const void *)addr; } /* * kmem_init: * * Create the kernel map; insert a mapping covering kernel text, * data, bss, and all space allocated thus far (`boostrap' data). The * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and * `start' as allocated, and the range between `start' and `end' as free. */ void kmem_init(start, end) vm_offset_t start, end; { vm_map_t m; m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); m->system_map = 1; vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, #ifdef __amd64__ KERNBASE, #else VM_MIN_KERNEL_ADDRESS, #endif start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); } #ifdef DIAGNOSTIC /* * Allow userspace to directly trigger the VM drain routine for testing * purposes. */ static int debug_vm_lowmem(SYSCTL_HANDLER_ARGS) { int error, i; i = 0; error = sysctl_handle_int(oidp, &i, 0, req); if (error) return (error); if (i) EVENTHANDLER_INVOKE(vm_lowmem, 0); return (0); } SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, debug_vm_lowmem, "I", "set to trigger vm_lowmem event"); #endif Index: projects/clang370-import/sys/xen/interface/mem_event.h =================================================================== --- projects/clang370-import/sys/xen/interface/mem_event.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/mem_event.h (nonexistent) @@ -1,80 +0,0 @@ -/****************************************************************************** - * mem_event.h - * - * Memory event common structures. - * - * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef _XEN_PUBLIC_MEM_EVENT_H -#define _XEN_PUBLIC_MEM_EVENT_H - -#include "xen.h" -#include "io/ring.h" - -/* Memory event flags */ -#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0) -#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1) -#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2) -#define MEM_EVENT_FLAG_FOREIGN (1 << 3) -#define MEM_EVENT_FLAG_DUMMY (1 << 4) - -/* Reasons for the memory event request */ -#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */ -#define MEM_EVENT_REASON_VIOLATION 1 /* access violation, GFN is address */ -#define MEM_EVENT_REASON_CR0 2 /* CR0 was hit: gfn is CR0 value */ -#define MEM_EVENT_REASON_CR3 3 /* CR3 was hit: gfn is CR3 value */ -#define MEM_EVENT_REASON_CR4 4 /* CR4 was hit: gfn is CR4 value */ -#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ -#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ - -typedef struct mem_event_st { - uint32_t flags; - uint32_t vcpu_id; - - uint64_t gfn; - uint64_t offset; - uint64_t gla; /* if gla_valid */ - - uint32_t p2mt; - - uint16_t access_r:1; - uint16_t access_w:1; - uint16_t access_x:1; - uint16_t gla_valid:1; - uint16_t available:12; - - uint16_t reason; -} mem_event_request_t, mem_event_response_t; - -DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t); - -#endif - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ Property changes on: projects/clang370-import/sys/xen/interface/mem_event.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/acm_ops.h =================================================================== --- projects/clang370-import/sys/xen/interface/acm_ops.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/acm_ops.h (nonexistent) @@ -1,159 +0,0 @@ -/* - * acm_ops.h: Xen access control module hypervisor commands - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Reiner Sailer - * Copyright (c) 2005,2006 International Business Machines Corporation. - */ - -#ifndef __XEN_PUBLIC_ACM_OPS_H__ -#define __XEN_PUBLIC_ACM_OPS_H__ - -#include "xen.h" -#include "acm.h" - -/* - * Make sure you increment the interface version whenever you modify this file! - * This makes sure that old versions of acm tools will stop working in a - * well-defined way (rather than crashing the machine, for instance). - */ -#define ACM_INTERFACE_VERSION 0xAAAA000A - -/************************************************************************/ - -/* - * Prototype for this hypercall is: - * int acm_op(int cmd, void *args) - * @cmd == ACMOP_??? (access control module operation). - * @args == Operation-specific extra arguments (NULL if none). - */ - - -#define ACMOP_setpolicy 1 -struct acm_setpolicy { - /* IN */ - XEN_GUEST_HANDLE_64(void) pushcache; - uint32_t pushcache_size; -}; - - -#define ACMOP_getpolicy 2 -struct acm_getpolicy { - /* IN */ - XEN_GUEST_HANDLE_64(void) pullcache; - uint32_t pullcache_size; -}; - - -#define ACMOP_dumpstats 3 -struct acm_dumpstats { - /* IN */ - XEN_GUEST_HANDLE_64(void) pullcache; - uint32_t pullcache_size; -}; - - -#define ACMOP_getssid 4 -#define ACM_GETBY_ssidref 1 -#define ACM_GETBY_domainid 2 -struct acm_getssid { - /* IN */ - uint32_t get_ssid_by; /* ACM_GETBY_* */ - union { - domaintype_t domainid; - ssidref_t ssidref; - } id; - XEN_GUEST_HANDLE_64(void) ssidbuf; - uint32_t ssidbuf_size; -}; - -#define ACMOP_getdecision 5 -struct acm_getdecision { - /* IN */ - uint32_t get_decision_by1; /* ACM_GETBY_* */ - uint32_t get_decision_by2; /* ACM_GETBY_* */ - union { - domaintype_t domainid; - ssidref_t ssidref; - } id1; - union { - domaintype_t domainid; - ssidref_t ssidref; - } id2; - uint32_t hook; - /* OUT */ - uint32_t acm_decision; -}; - - -#define ACMOP_chgpolicy 6 -struct acm_change_policy { - /* IN */ - XEN_GUEST_HANDLE_64(void) policy_pushcache; - uint32_t policy_pushcache_size; - XEN_GUEST_HANDLE_64(void) del_array; - uint32_t delarray_size; - XEN_GUEST_HANDLE_64(void) chg_array; - uint32_t chgarray_size; - /* OUT */ - /* array with error code */ - XEN_GUEST_HANDLE_64(void) err_array; - uint32_t errarray_size; -}; - -#define ACMOP_relabeldoms 7 -struct acm_relabel_doms { - /* IN */ - XEN_GUEST_HANDLE_64(void) relabel_map; - uint32_t relabel_map_size; - /* OUT */ - XEN_GUEST_HANDLE_64(void) err_array; - uint32_t errarray_size; -}; - -/* future interface to Xen */ -struct xen_acmctl { - uint32_t cmd; - uint32_t interface_version; - union { - struct acm_setpolicy setpolicy; - struct acm_getpolicy getpolicy; - struct acm_dumpstats dumpstats; - struct acm_getssid getssid; - struct acm_getdecision getdecision; - struct acm_change_policy change_policy; - struct acm_relabel_doms relabel_doms; - } u; -}; - -typedef struct xen_acmctl xen_acmctl_t; -DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t); - -#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ Property changes on: projects/clang370-import/sys/xen/interface/acm_ops.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/elfstructs.h =================================================================== --- projects/clang370-import/sys/xen/interface/elfstructs.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/elfstructs.h (nonexistent) @@ -1,527 +0,0 @@ -#ifndef __XEN_PUBLIC_ELFSTRUCTS_H__ -#define __XEN_PUBLIC_ELFSTRUCTS_H__ 1 -/* - * Copyright (c) 1995, 1996 Erik Theisen. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -typedef uint8_t Elf_Byte; - -typedef uint32_t Elf32_Addr; /* Unsigned program address */ -typedef uint32_t Elf32_Off; /* Unsigned file offset */ -typedef int32_t Elf32_Sword; /* Signed large integer */ -typedef uint32_t Elf32_Word; /* Unsigned large integer */ -typedef uint16_t Elf32_Half; /* Unsigned medium integer */ - -typedef uint64_t Elf64_Addr; -typedef uint64_t Elf64_Off; -typedef int32_t Elf64_Shalf; - -typedef int32_t Elf64_Sword; -typedef uint32_t Elf64_Word; - -typedef int64_t Elf64_Sxword; -typedef uint64_t Elf64_Xword; - -typedef uint32_t Elf64_Half; -typedef uint16_t Elf64_Quarter; - -/* - * e_ident[] identification indexes - * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html - */ -#define EI_MAG0 0 /* file ID */ -#define EI_MAG1 1 /* file ID */ -#define EI_MAG2 2 /* file ID */ -#define EI_MAG3 3 /* file ID */ -#define EI_CLASS 4 /* file class */ -#define EI_DATA 5 /* data encoding */ -#define EI_VERSION 6 /* ELF header version */ -#define EI_OSABI 7 /* OS/ABI ID */ -#define EI_ABIVERSION 8 /* ABI version */ -#define EI_PAD 9 /* start of pad bytes */ -#define EI_NIDENT 16 /* Size of e_ident[] */ - -/* e_ident[] magic number */ -#define ELFMAG0 0x7f /* e_ident[EI_MAG0] */ -#define ELFMAG1 'E' /* e_ident[EI_MAG1] */ -#define ELFMAG2 'L' /* e_ident[EI_MAG2] */ -#define ELFMAG3 'F' /* e_ident[EI_MAG3] */ -#define ELFMAG "\177ELF" /* magic */ -#define SELFMAG 4 /* size of magic */ - -/* e_ident[] file class */ -#define ELFCLASSNONE 0 /* invalid */ -#define ELFCLASS32 1 /* 32-bit objs */ -#define ELFCLASS64 2 /* 64-bit objs */ -#define ELFCLASSNUM 3 /* number of classes */ - -/* e_ident[] data encoding */ -#define ELFDATANONE 0 /* invalid */ -#define ELFDATA2LSB 1 /* Little-Endian */ -#define ELFDATA2MSB 2 /* Big-Endian */ -#define ELFDATANUM 3 /* number of data encode defines */ - -/* e_ident[] Operating System/ABI */ -#define ELFOSABI_SYSV 0 /* UNIX System V ABI */ -#define ELFOSABI_HPUX 1 /* HP-UX operating system */ -#define ELFOSABI_NETBSD 2 /* NetBSD */ -#define ELFOSABI_LINUX 3 /* GNU/Linux */ -#define ELFOSABI_HURD 4 /* GNU/Hurd */ -#define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */ -#define ELFOSABI_SOLARIS 6 /* Solaris */ -#define ELFOSABI_MONTEREY 7 /* Monterey */ -#define ELFOSABI_IRIX 8 /* IRIX */ -#define ELFOSABI_FREEBSD 9 /* FreeBSD */ -#define ELFOSABI_TRU64 10 /* TRU64 UNIX */ -#define ELFOSABI_MODESTO 11 /* Novell Modesto */ -#define ELFOSABI_OPENBSD 12 /* OpenBSD */ -#define ELFOSABI_ARM 97 /* ARM */ -#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ - -/* e_ident */ -#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \ - (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \ - (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \ - (ehdr).e_ident[EI_MAG3] == ELFMAG3) - -/* ELF Header */ -typedef struct elfhdr { - unsigned char e_ident[EI_NIDENT]; /* ELF Identification */ - Elf32_Half e_type; /* object file type */ - Elf32_Half e_machine; /* machine */ - Elf32_Word e_version; /* object file version */ - Elf32_Addr e_entry; /* virtual entry point */ - Elf32_Off e_phoff; /* program header table offset */ - Elf32_Off e_shoff; /* section header table offset */ - Elf32_Word e_flags; /* processor-specific flags */ - Elf32_Half e_ehsize; /* ELF header size */ - Elf32_Half e_phentsize; /* program header entry size */ - Elf32_Half e_phnum; /* number of program header entries */ - Elf32_Half e_shentsize; /* section header entry size */ - Elf32_Half e_shnum; /* number of section header entries */ - Elf32_Half e_shstrndx; /* section header table's "section - header string table" entry offset */ -} Elf32_Ehdr; - -typedef struct { - unsigned char e_ident[EI_NIDENT]; /* Id bytes */ - Elf64_Quarter e_type; /* file type */ - Elf64_Quarter e_machine; /* machine type */ - Elf64_Half e_version; /* version number */ - Elf64_Addr e_entry; /* entry point */ - Elf64_Off e_phoff; /* Program hdr offset */ - Elf64_Off e_shoff; /* Section hdr offset */ - Elf64_Half e_flags; /* Processor flags */ - Elf64_Quarter e_ehsize; /* sizeof ehdr */ - Elf64_Quarter e_phentsize; /* Program header entry size */ - Elf64_Quarter e_phnum; /* Number of program headers */ - Elf64_Quarter e_shentsize; /* Section header entry size */ - Elf64_Quarter e_shnum; /* Number of section headers */ - Elf64_Quarter e_shstrndx; /* String table index */ -} Elf64_Ehdr; - -/* e_type */ -#define ET_NONE 0 /* No file type */ -#define ET_REL 1 /* relocatable file */ -#define ET_EXEC 2 /* executable file */ -#define ET_DYN 3 /* shared object file */ -#define ET_CORE 4 /* core file */ -#define ET_NUM 5 /* number of types */ -#define ET_LOPROC 0xff00 /* reserved range for processor */ -#define ET_HIPROC 0xffff /* specific e_type */ - -/* e_machine */ -#define EM_NONE 0 /* No Machine */ -#define EM_M32 1 /* AT&T WE 32100 */ -#define EM_SPARC 2 /* SPARC */ -#define EM_386 3 /* Intel 80386 */ -#define EM_68K 4 /* Motorola 68000 */ -#define EM_88K 5 /* Motorola 88000 */ -#define EM_486 6 /* Intel 80486 - unused? */ -#define EM_860 7 /* Intel 80860 */ -#define EM_MIPS 8 /* MIPS R3000 Big-Endian only */ -/* - * Don't know if EM_MIPS_RS4_BE, - * EM_SPARC64, EM_PARISC, - * or EM_PPC are ABI compliant - */ -#define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */ -#define EM_SPARC64 11 /* SPARC v9 64-bit unoffical */ -#define EM_PARISC 15 /* HPPA */ -#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */ -#define EM_PPC 20 /* PowerPC */ -#define EM_PPC64 21 /* PowerPC 64-bit */ -#define EM_ARM 40 /* Advanced RISC Machines ARM */ -#define EM_ALPHA 41 /* DEC ALPHA */ -#define EM_SPARCV9 43 /* SPARC version 9 */ -#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */ -#define EM_IA_64 50 /* Intel Merced */ -#define EM_X86_64 62 /* AMD x86-64 architecture */ -#define EM_VAX 75 /* DEC VAX */ - -/* Version */ -#define EV_NONE 0 /* Invalid */ -#define EV_CURRENT 1 /* Current */ -#define EV_NUM 2 /* number of versions */ - -/* Section Header */ -typedef struct { - Elf32_Word sh_name; /* name - index into section header - string table section */ - Elf32_Word sh_type; /* type */ - Elf32_Word sh_flags; /* flags */ - Elf32_Addr sh_addr; /* address */ - Elf32_Off sh_offset; /* file offset */ - Elf32_Word sh_size; /* section size */ - Elf32_Word sh_link; /* section header table index link */ - Elf32_Word sh_info; /* extra information */ - Elf32_Word sh_addralign; /* address alignment */ - Elf32_Word sh_entsize; /* section entry size */ -} Elf32_Shdr; - -typedef struct { - Elf64_Half sh_name; /* section name */ - Elf64_Half sh_type; /* section type */ - Elf64_Xword sh_flags; /* section flags */ - Elf64_Addr sh_addr; /* virtual address */ - Elf64_Off sh_offset; /* file offset */ - Elf64_Xword sh_size; /* section size */ - Elf64_Half sh_link; /* link to another */ - Elf64_Half sh_info; /* misc info */ - Elf64_Xword sh_addralign; /* memory alignment */ - Elf64_Xword sh_entsize; /* table entry size */ -} Elf64_Shdr; - -/* Special Section Indexes */ -#define SHN_UNDEF 0 /* undefined */ -#define SHN_LORESERVE 0xff00 /* lower bounds of reserved indexes */ -#define SHN_LOPROC 0xff00 /* reserved range for processor */ -#define SHN_HIPROC 0xff1f /* specific section indexes */ -#define SHN_ABS 0xfff1 /* absolute value */ -#define SHN_COMMON 0xfff2 /* common symbol */ -#define SHN_HIRESERVE 0xffff /* upper bounds of reserved indexes */ - -/* sh_type */ -#define SHT_NULL 0 /* inactive */ -#define SHT_PROGBITS 1 /* program defined information */ -#define SHT_SYMTAB 2 /* symbol table section */ -#define SHT_STRTAB 3 /* string table section */ -#define SHT_RELA 4 /* relocation section with addends*/ -#define SHT_HASH 5 /* symbol hash table section */ -#define SHT_DYNAMIC 6 /* dynamic section */ -#define SHT_NOTE 7 /* note section */ -#define SHT_NOBITS 8 /* no space section */ -#define SHT_REL 9 /* relation section without addends */ -#define SHT_SHLIB 10 /* reserved - purpose unknown */ -#define SHT_DYNSYM 11 /* dynamic symbol table section */ -#define SHT_NUM 12 /* number of section types */ -#define SHT_LOPROC 0x70000000 /* reserved range for processor */ -#define SHT_HIPROC 0x7fffffff /* specific section header types */ -#define SHT_LOUSER 0x80000000 /* reserved range for application */ -#define SHT_HIUSER 0xffffffff /* specific indexes */ - -/* Section names */ -#define ELF_BSS ".bss" /* uninitialized data */ -#define ELF_DATA ".data" /* initialized data */ -#define ELF_DEBUG ".debug" /* debug */ -#define ELF_DYNAMIC ".dynamic" /* dynamic linking information */ -#define ELF_DYNSTR ".dynstr" /* dynamic string table */ -#define ELF_DYNSYM ".dynsym" /* dynamic symbol table */ -#define ELF_FINI ".fini" /* termination code */ -#define ELF_GOT ".got" /* global offset table */ -#define ELF_HASH ".hash" /* symbol hash table */ -#define ELF_INIT ".init" /* initialization code */ -#define ELF_REL_DATA ".rel.data" /* relocation data */ -#define ELF_REL_FINI ".rel.fini" /* relocation termination code */ -#define ELF_REL_INIT ".rel.init" /* relocation initialization code */ -#define ELF_REL_DYN ".rel.dyn" /* relocaltion dynamic link info */ -#define ELF_REL_RODATA ".rel.rodata" /* relocation read-only data */ -#define ELF_REL_TEXT ".rel.text" /* relocation code */ -#define ELF_RODATA ".rodata" /* read-only data */ -#define ELF_SHSTRTAB ".shstrtab" /* section header string table */ -#define ELF_STRTAB ".strtab" /* string table */ -#define ELF_SYMTAB ".symtab" /* symbol table */ -#define ELF_TEXT ".text" /* code */ - - -/* Section Attribute Flags - sh_flags */ -#define SHF_WRITE 0x1 /* Writable */ -#define SHF_ALLOC 0x2 /* occupies memory */ -#define SHF_EXECINSTR 0x4 /* executable */ -#define SHF_MASKPROC 0xf0000000 /* reserved bits for processor */ - /* specific section attributes */ - -/* Symbol Table Entry */ -typedef struct elf32_sym { - Elf32_Word st_name; /* name - index into string table */ - Elf32_Addr st_value; /* symbol value */ - Elf32_Word st_size; /* symbol size */ - unsigned char st_info; /* type and binding */ - unsigned char st_other; /* 0 - no defined meaning */ - Elf32_Half st_shndx; /* section header index */ -} Elf32_Sym; - -typedef struct { - Elf64_Half st_name; /* Symbol name index in str table */ - Elf_Byte st_info; /* type / binding attrs */ - Elf_Byte st_other; /* unused */ - Elf64_Quarter st_shndx; /* section index of symbol */ - Elf64_Xword st_value; /* value of symbol */ - Elf64_Xword st_size; /* size of symbol */ -} Elf64_Sym; - -/* Symbol table index */ -#define STN_UNDEF 0 /* undefined */ - -/* Extract symbol info - st_info */ -#define ELF32_ST_BIND(x) ((x) >> 4) -#define ELF32_ST_TYPE(x) (((unsigned int) x) & 0xf) -#define ELF32_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) - -#define ELF64_ST_BIND(x) ((x) >> 4) -#define ELF64_ST_TYPE(x) (((unsigned int) x) & 0xf) -#define ELF64_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) - -/* Symbol Binding - ELF32_ST_BIND - st_info */ -#define STB_LOCAL 0 /* Local symbol */ -#define STB_GLOBAL 1 /* Global symbol */ -#define STB_WEAK 2 /* like global - lower precedence */ -#define STB_NUM 3 /* number of symbol bindings */ -#define STB_LOPROC 13 /* reserved range for processor */ -#define STB_HIPROC 15 /* specific symbol bindings */ - -/* Symbol type - ELF32_ST_TYPE - st_info */ -#define STT_NOTYPE 0 /* not specified */ -#define STT_OBJECT 1 /* data object */ -#define STT_FUNC 2 /* function */ -#define STT_SECTION 3 /* section */ -#define STT_FILE 4 /* file */ -#define STT_NUM 5 /* number of symbol types */ -#define STT_LOPROC 13 /* reserved range for processor */ -#define STT_HIPROC 15 /* specific symbol types */ - -/* Relocation entry with implicit addend */ -typedef struct { - Elf32_Addr r_offset; /* offset of relocation */ - Elf32_Word r_info; /* symbol table index and type */ -} Elf32_Rel; - -/* Relocation entry with explicit addend */ -typedef struct { - Elf32_Addr r_offset; /* offset of relocation */ - Elf32_Word r_info; /* symbol table index and type */ - Elf32_Sword r_addend; -} Elf32_Rela; - -/* Extract relocation info - r_info */ -#define ELF32_R_SYM(i) ((i) >> 8) -#define ELF32_R_TYPE(i) ((unsigned char) (i)) -#define ELF32_R_INFO(s,t) (((s) << 8) + (unsigned char)(t)) - -typedef struct { - Elf64_Xword r_offset; /* where to do it */ - Elf64_Xword r_info; /* index & type of relocation */ -} Elf64_Rel; - -typedef struct { - Elf64_Xword r_offset; /* where to do it */ - Elf64_Xword r_info; /* index & type of relocation */ - Elf64_Sxword r_addend; /* adjustment value */ -} Elf64_Rela; - -#define ELF64_R_SYM(info) ((info) >> 32) -#define ELF64_R_TYPE(info) ((info) & 0xFFFFFFFF) -#define ELF64_R_INFO(s,t) (((s) << 32) + (u_int32_t)(t)) - -/* Program Header */ -typedef struct { - Elf32_Word p_type; /* segment type */ - Elf32_Off p_offset; /* segment offset */ - Elf32_Addr p_vaddr; /* virtual address of segment */ - Elf32_Addr p_paddr; /* physical address - ignored? */ - Elf32_Word p_filesz; /* number of bytes in file for seg. */ - Elf32_Word p_memsz; /* number of bytes in mem. for seg. */ - Elf32_Word p_flags; /* flags */ - Elf32_Word p_align; /* memory alignment */ -} Elf32_Phdr; - -typedef struct { - Elf64_Half p_type; /* entry type */ - Elf64_Half p_flags; /* flags */ - Elf64_Off p_offset; /* offset */ - Elf64_Addr p_vaddr; /* virtual address */ - Elf64_Addr p_paddr; /* physical address */ - Elf64_Xword p_filesz; /* file size */ - Elf64_Xword p_memsz; /* memory size */ - Elf64_Xword p_align; /* memory & file alignment */ -} Elf64_Phdr; - -/* Segment types - p_type */ -#define PT_NULL 0 /* unused */ -#define PT_LOAD 1 /* loadable segment */ -#define PT_DYNAMIC 2 /* dynamic linking section */ -#define PT_INTERP 3 /* the RTLD */ -#define PT_NOTE 4 /* auxiliary information */ -#define PT_SHLIB 5 /* reserved - purpose undefined */ -#define PT_PHDR 6 /* program header */ -#define PT_NUM 7 /* Number of segment types */ -#define PT_LOPROC 0x70000000 /* reserved range for processor */ -#define PT_HIPROC 0x7fffffff /* specific segment types */ - -/* Segment flags - p_flags */ -#define PF_X 0x1 /* Executable */ -#define PF_W 0x2 /* Writable */ -#define PF_R 0x4 /* Readable */ -#define PF_MASKPROC 0xf0000000 /* reserved bits for processor */ - /* specific segment flags */ - -/* Dynamic structure */ -typedef struct { - Elf32_Sword d_tag; /* controls meaning of d_val */ - union { - Elf32_Word d_val; /* Multiple meanings - see d_tag */ - Elf32_Addr d_ptr; /* program virtual address */ - } d_un; -} Elf32_Dyn; - -typedef struct { - Elf64_Xword d_tag; /* controls meaning of d_val */ - union { - Elf64_Addr d_ptr; - Elf64_Xword d_val; - } d_un; -} Elf64_Dyn; - -/* Dynamic Array Tags - d_tag */ -#define DT_NULL 0 /* marks end of _DYNAMIC array */ -#define DT_NEEDED 1 /* string table offset of needed lib */ -#define DT_PLTRELSZ 2 /* size of relocation entries in PLT */ -#define DT_PLTGOT 3 /* address PLT/GOT */ -#define DT_HASH 4 /* address of symbol hash table */ -#define DT_STRTAB 5 /* address of string table */ -#define DT_SYMTAB 6 /* address of symbol table */ -#define DT_RELA 7 /* address of relocation table */ -#define DT_RELASZ 8 /* size of relocation table */ -#define DT_RELAENT 9 /* size of relocation entry */ -#define DT_STRSZ 10 /* size of string table */ -#define DT_SYMENT 11 /* size of symbol table entry */ -#define DT_INIT 12 /* address of initialization func. */ -#define DT_FINI 13 /* address of termination function */ -#define DT_SONAME 14 /* string table offset of shared obj */ -#define DT_RPATH 15 /* string table offset of library - search path */ -#define DT_SYMBOLIC 16 /* start sym search in shared obj. */ -#define DT_REL 17 /* address of rel. tbl. w addends */ -#define DT_RELSZ 18 /* size of DT_REL relocation table */ -#define DT_RELENT 19 /* size of DT_REL relocation entry */ -#define DT_PLTREL 20 /* PLT referenced relocation entry */ -#define DT_DEBUG 21 /* bugger */ -#define DT_TEXTREL 22 /* Allow rel. mod. to unwritable seg */ -#define DT_JMPREL 23 /* add. of PLT's relocation entries */ -#define DT_BIND_NOW 24 /* Bind now regardless of env setting */ -#define DT_NUM 25 /* Number used. */ -#define DT_LOPROC 0x70000000 /* reserved range for processor */ -#define DT_HIPROC 0x7fffffff /* specific dynamic array tags */ - -/* Standard ELF hashing function */ -unsigned int elf_hash(const unsigned char *name); - -/* - * Note Definitions - */ -typedef struct { - Elf32_Word namesz; - Elf32_Word descsz; - Elf32_Word type; -} Elf32_Note; - -typedef struct { - Elf64_Half namesz; - Elf64_Half descsz; - Elf64_Half type; -} Elf64_Note; - - -#if defined(ELFSIZE) -#define CONCAT(x,y) __CONCAT(x,y) -#define ELFNAME(x) CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x))) -#define ELFNAME2(x,y) CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y)))) -#define ELFNAMEEND(x) CONCAT(x,CONCAT(_elf,ELFSIZE)) -#define ELFDEFNNAME(x) CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x))) -#endif - -#if defined(ELFSIZE) && (ELFSIZE == 32) -#define Elf_Ehdr Elf32_Ehdr -#define Elf_Phdr Elf32_Phdr -#define Elf_Shdr Elf32_Shdr -#define Elf_Sym Elf32_Sym -#define Elf_Rel Elf32_Rel -#define Elf_RelA Elf32_Rela -#define Elf_Dyn Elf32_Dyn -#define Elf_Word Elf32_Word -#define Elf_Sword Elf32_Sword -#define Elf_Addr Elf32_Addr -#define Elf_Off Elf32_Off -#define Elf_Nhdr Elf32_Nhdr -#define Elf_Note Elf32_Note - -#define ELF_R_SYM ELF32_R_SYM -#define ELF_R_TYPE ELF32_R_TYPE -#define ELF_R_INFO ELF32_R_INFO -#define ELFCLASS ELFCLASS32 - -#define ELF_ST_BIND ELF32_ST_BIND -#define ELF_ST_TYPE ELF32_ST_TYPE -#define ELF_ST_INFO ELF32_ST_INFO - -#define AuxInfo Aux32Info -#elif defined(ELFSIZE) && (ELFSIZE == 64) -#define Elf_Ehdr Elf64_Ehdr -#define Elf_Phdr Elf64_Phdr -#define Elf_Shdr Elf64_Shdr -#define Elf_Sym Elf64_Sym -#define Elf_Rel Elf64_Rel -#define Elf_RelA Elf64_Rela -#define Elf_Dyn Elf64_Dyn -#define Elf_Word Elf64_Word -#define Elf_Sword Elf64_Sword -#define Elf_Addr Elf64_Addr -#define Elf_Off Elf64_Off -#define Elf_Nhdr Elf64_Nhdr -#define Elf_Note Elf64_Note - -#define ELF_R_SYM ELF64_R_SYM -#define ELF_R_TYPE ELF64_R_TYPE -#define ELF_R_INFO ELF64_R_INFO -#define ELFCLASS ELFCLASS64 - -#define ELF_ST_BIND ELF64_ST_BIND -#define ELF_ST_TYPE ELF64_ST_TYPE -#define ELF_ST_INFO ELF64_ST_INFO - -#define AuxInfo Aux64Info -#endif - -#endif /* __XEN_PUBLIC_ELFSTRUCTS_H__ */ Property changes on: projects/clang370-import/sys/xen/interface/elfstructs.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/foreign/structs.py =================================================================== --- projects/clang370-import/sys/xen/interface/foreign/structs.py (revision 288925) +++ projects/clang370-import/sys/xen/interface/foreign/structs.py (nonexistent) @@ -1,58 +0,0 @@ -# configuration: what needs translation - -unions = [ "vcpu_cr_regs", - "vcpu_ar_regs" ]; - -structs = [ "start_info", - "trap_info", - "pt_fpreg", - "cpu_user_regs", - "xen_ia64_boot_param", - "ia64_tr_entry", - "vcpu_tr_regs", - "vcpu_guest_context_regs", - "vcpu_guest_context", - "arch_vcpu_info", - "vcpu_time_info", - "vcpu_info", - "arch_shared_info", - "shared_info" ]; - -defines = [ "__i386__", - "__x86_64__", - - "FLAT_RING1_CS", - "FLAT_RING1_DS", - "FLAT_RING1_SS", - - "FLAT_RING3_CS64", - "FLAT_RING3_DS64", - "FLAT_RING3_SS64", - "FLAT_KERNEL_CS64", - "FLAT_KERNEL_DS64", - "FLAT_KERNEL_SS64", - - "FLAT_KERNEL_CS", - "FLAT_KERNEL_DS", - "FLAT_KERNEL_SS", - - # x86_{32,64} - "_VGCF_i387_valid", - "VGCF_i387_valid", - "_VGCF_in_kernel", - "VGCF_in_kernel", - "_VGCF_failsafe_disables_events", - "VGCF_failsafe_disables_events", - "_VGCF_syscall_disables_events", - "VGCF_syscall_disables_events", - "_VGCF_online", - "VGCF_online", - - # ia64 - "VGCF_EXTRA_REGS", - - # all archs - "xen_pfn_to_cr3", - "XEN_LEGACY_MAX_VCPUS", - "MAX_GUEST_CMDLINE" ]; - Property changes on: projects/clang370-import/sys/xen/interface/foreign/structs.py ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/foreign/mkchecker.py =================================================================== --- projects/clang370-import/sys/xen/interface/foreign/mkchecker.py (revision 288925) +++ projects/clang370-import/sys/xen/interface/foreign/mkchecker.py (nonexistent) @@ -1,58 +0,0 @@ -#!/usr/bin/python - -import sys; -from structs import structs; - -# command line arguments -arch = sys.argv[1]; -outfile = sys.argv[2]; -archs = sys.argv[3:]; - -f = open(outfile, "w"); -f.write(''' -/* - * sanity checks for generated foreign headers: - * - verify struct sizes - * - * generated by %s -- DO NOT EDIT - */ -#include -#include -#include -#include -#include "../xen.h" -'''); - -for a in archs: - f.write('#include "%s.h"\n' % a); - -f.write('int main(int argc, char *argv[])\n{\n'); - -f.write('\tprintf("\\n");'); -f.write('printf("%-25s |", "structs");\n'); -for a in archs: - f.write('\tprintf("%%8s", "%s");\n' % a); -f.write('\tprintf("\\n");'); - -f.write('\tprintf("\\n");'); -for struct in structs: - f.write('\tprintf("%%-25s |", "%s");\n' % struct); - for a in archs: - if a == arch: - s = struct; # native - else: - s = struct + "_" + a; - f.write('#ifdef %s_has_no_%s\n' % (a, struct)); - f.write('\tprintf("%8s", "-");\n'); - f.write("#else\n"); - f.write('\tprintf("%%8zd", sizeof(struct %s));\n' % s); - f.write("#endif\n"); - - f.write('\tprintf("\\n");\n\n'); - -f.write('\tprintf("\\n");\n'); -f.write('\texit(0);\n'); -f.write('}\n'); - -f.close(); - Property changes on: projects/clang370-import/sys/xen/interface/foreign/mkchecker.py ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/foreign/mkheader.py =================================================================== --- projects/clang370-import/sys/xen/interface/foreign/mkheader.py (revision 288925) +++ projects/clang370-import/sys/xen/interface/foreign/mkheader.py (nonexistent) @@ -1,167 +0,0 @@ -#!/usr/bin/python - -import sys, re; -from structs import unions, structs, defines; - -# command line arguments -arch = sys.argv[1]; -outfile = sys.argv[2]; -infiles = sys.argv[3:]; - - -########################################################################### -# configuration #2: architecture information - -inttypes = {}; -header = {}; -footer = {}; - -# x86_32 -inttypes["x86_32"] = { - "unsigned long" : "uint32_t", - "long" : "uint32_t", - "xen_pfn_t" : "uint32_t", -}; -header["x86_32"] = """ -#define __i386___X86_32 1 -#pragma pack(4) -"""; -footer["x86_32"] = """ -#pragma pack() -"""; - -# x86_64 -inttypes["x86_64"] = { - "unsigned long" : "__align8__ uint64_t", - "long" : "__align8__ uint64_t", - "xen_pfn_t" : "__align8__ uint64_t", -}; -header["x86_64"] = """ -#ifdef __GNUC__ -# define __DECL_REG(name) union { uint64_t r ## name, e ## name; } -# define __align8__ __attribute__((aligned (8))) -#else -# define __DECL_REG(name) uint64_t r ## name -# define __align8__ FIXME -#endif -#define __x86_64___X86_64 1 -"""; - -# ia64 -inttypes["ia64"] = { - "unsigned long" : "__align8__ uint64_t", - "long" : "__align8__ uint64_t", - "xen_pfn_t" : "__align8__ uint64_t", - "long double" : "__align16__ ldouble_t", -}; -header["ia64"] = """ -#define __align8__ __attribute__((aligned (8))) -#define __align16__ __attribute__((aligned (16))) -typedef unsigned char ldouble_t[16]; -"""; - - -########################################################################### -# main - -input = ""; -output = ""; -fileid = re.sub("[-.]", "_", "__FOREIGN_%s__" % outfile.upper()); - -# read input header files -for name in infiles: - f = open(name, "r"); - input += f.read(); - f.close(); - -# add header -output += """ -/* - * public xen defines and struct for %s - * generated by %s -- DO NOT EDIT - */ - -#ifndef %s -#define %s 1 - -""" % (arch, sys.argv[0], fileid, fileid) - -if arch in header: - output += header[arch]; - output += "\n"; - -# add defines to output -for line in re.findall("#define[^\n]+", input): - for define in defines: - regex = "#define\s+%s\\b" % define; - match = re.search(regex, line); - if None == match: - continue; - if define.upper()[0] == define[0]: - replace = define + "_" + arch.upper(); - else: - replace = define + "_" + arch; - regex = "\\b%s\\b" % define; - output += re.sub(regex, replace, line) + "\n"; -output += "\n"; - -# delete defines, comments, empty lines -input = re.sub("#define[^\n]+\n", "", input); -input = re.compile("/\*(.*?)\*/", re.S).sub("", input) -input = re.compile("\n\s*\n", re.S).sub("\n", input); - -# add unions to output -for union in unions: - regex = "union\s+%s\s*\{(.*?)\n\};" % union; - match = re.search(regex, input, re.S) - if None == match: - output += "#define %s_has_no_%s 1\n" % (arch, union); - else: - output += "union %s_%s {%s\n};\n" % (union, arch, match.group(1)); - output += "\n"; - -# add structs to output -for struct in structs: - regex = "struct\s+%s\s*\{(.*?)\n\};" % struct; - match = re.search(regex, input, re.S) - if None == match: - output += "#define %s_has_no_%s 1\n" % (arch, struct); - else: - output += "struct %s_%s {%s\n};\n" % (struct, arch, match.group(1)); - output += "typedef struct %s_%s %s_%s_t;\n" % (struct, arch, struct, arch); - output += "\n"; - -# add footer -if arch in footer: - output += footer[arch]; - output += "\n"; -output += "#endif /* %s */\n" % fileid; - -# replace: defines -for define in defines: - if define.upper()[0] == define[0]: - replace = define + "_" + arch.upper(); - else: - replace = define + "_" + arch; - output = re.sub("\\b%s\\b" % define, replace, output); - -# replace: unions -for union in unions: - output = re.sub("\\b(union\s+%s)\\b" % union, "\\1_%s" % arch, output); - -# replace: structs + struct typedefs -for struct in structs: - output = re.sub("\\b(struct\s+%s)\\b" % struct, "\\1_%s" % arch, output); - output = re.sub("\\b(%s)_t\\b" % struct, "\\1_%s_t" % arch, output); - -# replace: integer types -integers = list(inttypes[arch].keys()); -integers.sort(lambda a, b: cmp(len(b),len(a))); -for type in integers: - output = re.sub("\\b%s\\b" % type, inttypes[arch][type], output); - -# print results -f = open(outfile, "w"); -f.write(output); -f.close; - Property changes on: projects/clang370-import/sys/xen/interface/foreign/mkheader.py ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/foreign/reference.size =================================================================== --- projects/clang370-import/sys/xen/interface/foreign/reference.size (revision 288925) +++ projects/clang370-import/sys/xen/interface/foreign/reference.size (nonexistent) @@ -1,17 +0,0 @@ - -structs | x86_32 x86_64 ia64 - -start_info | 1104 1152 1152 -trap_info | 8 16 - -pt_fpreg | - - 16 -cpu_user_regs | 68 200 496 -xen_ia64_boot_param | - - 96 -ia64_tr_entry | - - 32 -vcpu_extra_regs | - - 536 -vcpu_guest_context | 2800 5168 1056 -arch_vcpu_info | 24 16 0 -vcpu_time_info | 32 32 32 -vcpu_info | 64 64 48 -arch_shared_info | 268 280 272 -shared_info | 2584 3368 4384 - Property changes on: projects/clang370-import/sys/xen/interface/foreign/reference.size ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/foreign/Makefile =================================================================== --- projects/clang370-import/sys/xen/interface/foreign/Makefile (revision 288925) +++ projects/clang370-import/sys/xen/interface/foreign/Makefile (nonexistent) @@ -1,37 +0,0 @@ -XEN_ROOT=../../../.. -include $(XEN_ROOT)/Config.mk - -architectures := x86_32 x86_64 ia64 -headers := $(patsubst %, %.h, $(architectures)) -scripts := $(wildcard *.py) - -.PHONY: all clean check-headers -all: $(headers) check-headers - -clean: - rm -f $(headers) - rm -f checker checker.c $(XEN_TARGET_ARCH).size - rm -f *.pyc *.o *~ - -ifeq ($(CROSS_COMPILE)$(XEN_TARGET_ARCH),$(XEN_COMPILE_ARCH)) -check-headers: checker - ./checker > $(XEN_TARGET_ARCH).size - diff -u reference.size $(XEN_TARGET_ARCH).size -checker: checker.c $(headers) - $(HOSTCC) $(HOSTCFLAGS) -o $@ $< -else -check-headers: - @echo "cross build: skipping check" -endif - -x86_32.h: ../arch-x86/xen-x86_32.h ../arch-x86/xen.h ../xen.h $(scripts) - python mkheader.py $* $@ $(filter %.h,$^) - -x86_64.h: ../arch-x86/xen-x86_64.h ../arch-x86/xen.h ../xen.h $(scripts) - python mkheader.py $* $@ $(filter %.h,$^) - -ia64.h: ../arch-ia64.h ../xen.h $(scripts) - python mkheader.py $* $@ $(filter %.h,$^) - -checker.c: $(scripts) - python mkchecker.py $(XEN_TARGET_ARCH) $@ $(architectures) Property changes on: projects/clang370-import/sys/xen/interface/foreign/Makefile ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/foreign =================================================================== --- projects/clang370-import/sys/xen/interface/foreign (revision 288925) +++ projects/clang370-import/sys/xen/interface/foreign (nonexistent) Property changes on: projects/clang370-import/sys/xen/interface/foreign ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/acm.h =================================================================== --- projects/clang370-import/sys/xen/interface/acm.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/acm.h (nonexistent) @@ -1,228 +0,0 @@ -/* - * acm.h: Xen access control module interface defintions - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Reiner Sailer - * Copyright (c) 2005, International Business Machines Corporation. - */ - -#ifndef _XEN_PUBLIC_ACM_H -#define _XEN_PUBLIC_ACM_H - -#include "xen.h" - -/* if ACM_DEBUG defined, all hooks should - * print a short trace message (comment it out - * when not in testing mode ) - */ -/* #define ACM_DEBUG */ - -#ifdef ACM_DEBUG -# define printkd(fmt, args...) printk(fmt,## args) -#else -# define printkd(fmt, args...) -#endif - -/* default ssid reference value if not supplied */ -#define ACM_DEFAULT_SSID 0x0 -#define ACM_DEFAULT_LOCAL_SSID 0x0 - -/* Internal ACM ERROR types */ -#define ACM_OK 0 -#define ACM_UNDEF -1 -#define ACM_INIT_SSID_ERROR -2 -#define ACM_INIT_SOID_ERROR -3 -#define ACM_ERROR -4 - -/* External ACCESS DECISIONS */ -#define ACM_ACCESS_PERMITTED 0 -#define ACM_ACCESS_DENIED -111 -#define ACM_NULL_POINTER_ERROR -200 - -/* - Error codes reported in when trying to test for a new policy - These error codes are reported in an array of tuples where - each error code is followed by a parameter describing the error - more closely, such as a domain id. -*/ -#define ACM_EVTCHN_SHARING_VIOLATION 0x100 -#define ACM_GNTTAB_SHARING_VIOLATION 0x101 -#define ACM_DOMAIN_LOOKUP 0x102 -#define ACM_CHWALL_CONFLICT 0x103 -#define ACM_SSIDREF_IN_USE 0x104 - - -/* primary policy in lower 4 bits */ -#define ACM_NULL_POLICY 0 -#define ACM_CHINESE_WALL_POLICY 1 -#define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2 -#define ACM_POLICY_UNDEFINED 15 - -/* combinations have secondary policy component in higher 4bit */ -#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \ - ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY) - -/* policy: */ -#define ACM_POLICY_NAME(X) \ - ((X) == (ACM_NULL_POLICY)) ? "NULL" : \ - ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL" : \ - ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT" : \ - ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT" : \ - "UNDEFINED" - -/* the following policy versions must be increased - * whenever the interpretation of the related - * policy's data structure changes - */ -#define ACM_POLICY_VERSION 3 -#define ACM_CHWALL_VERSION 1 -#define ACM_STE_VERSION 1 - -/* defines a ssid reference used by xen */ -typedef uint32_t ssidref_t; - -/* hooks that are known to domains */ -#define ACMHOOK_none 0 -#define ACMHOOK_sharing 1 - -/* -------security policy relevant type definitions-------- */ - -/* type identifier; compares to "equal" or "not equal" */ -typedef uint16_t domaintype_t; - -/* CHINESE WALL POLICY DATA STRUCTURES - * - * current accumulated conflict type set: - * When a domain is started and has a type that is in - * a conflict set, the conflicting types are incremented in - * the aggregate set. When a domain is destroyed, the - * conflicting types to its type are decremented. - * If a domain has multiple types, this procedure works over - * all those types. - * - * conflict_aggregate_set[i] holds the number of - * running domains that have a conflict with type i. - * - * running_types[i] holds the number of running domains - * that include type i in their ssidref-referenced type set - * - * conflict_sets[i][j] is "0" if type j has no conflict - * with type i and is "1" otherwise. - */ -/* high-16 = version, low-16 = check magic */ -#define ACM_MAGIC 0x0001debc - -/* each offset in bytes from start of the struct they - * are part of */ - -/* V3 of the policy buffer aded a version structure */ -struct acm_policy_version -{ - uint32_t major; - uint32_t minor; -}; - - -/* each buffer consists of all policy information for - * the respective policy given in the policy code - * - * acm_policy_buffer, acm_chwall_policy_buffer, - * and acm_ste_policy_buffer need to stay 32-bit aligned - * because we create binary policies also with external - * tools that assume packed representations (e.g. the java tool) - */ -struct acm_policy_buffer { - uint32_t policy_version; /* ACM_POLICY_VERSION */ - uint32_t magic; - uint32_t len; - uint32_t policy_reference_offset; - uint32_t primary_policy_code; - uint32_t primary_buffer_offset; - uint32_t secondary_policy_code; - uint32_t secondary_buffer_offset; - struct acm_policy_version xml_pol_version; /* add in V3 */ -}; - - -struct acm_policy_reference_buffer { - uint32_t len; -}; - -struct acm_chwall_policy_buffer { - uint32_t policy_version; /* ACM_CHWALL_VERSION */ - uint32_t policy_code; - uint32_t chwall_max_types; - uint32_t chwall_max_ssidrefs; - uint32_t chwall_max_conflictsets; - uint32_t chwall_ssid_offset; - uint32_t chwall_conflict_sets_offset; - uint32_t chwall_running_types_offset; - uint32_t chwall_conflict_aggregate_offset; -}; - -struct acm_ste_policy_buffer { - uint32_t policy_version; /* ACM_STE_VERSION */ - uint32_t policy_code; - uint32_t ste_max_types; - uint32_t ste_max_ssidrefs; - uint32_t ste_ssid_offset; -}; - -struct acm_stats_buffer { - uint32_t magic; - uint32_t len; - uint32_t primary_policy_code; - uint32_t primary_stats_offset; - uint32_t secondary_policy_code; - uint32_t secondary_stats_offset; -}; - -struct acm_ste_stats_buffer { - uint32_t ec_eval_count; - uint32_t gt_eval_count; - uint32_t ec_denied_count; - uint32_t gt_denied_count; - uint32_t ec_cachehit_count; - uint32_t gt_cachehit_count; -}; - -struct acm_ssid_buffer { - uint32_t len; - ssidref_t ssidref; - uint32_t policy_reference_offset; - uint32_t primary_policy_code; - uint32_t primary_max_types; - uint32_t primary_types_offset; - uint32_t secondary_policy_code; - uint32_t secondary_max_types; - uint32_t secondary_types_offset; -}; - -#endif - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ Property changes on: projects/clang370-import/sys/xen/interface/acm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/libelf.h =================================================================== --- projects/clang370-import/sys/xen/interface/libelf.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/libelf.h (nonexistent) @@ -1,265 +0,0 @@ -/****************************************************************************** - * libelf.h - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef __XC_LIBELF__ -#define __XC_LIBELF__ 1 - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) -#define XEN_ELF_LITTLE_ENDIAN -#else -#error define architectural endianness -#endif - -#undef ELFSIZE -#include "elfnote.h" -#include "elfstructs.h" -#include "features.h" - -/* ------------------------------------------------------------------------ */ - -typedef union { - Elf32_Ehdr e32; - Elf64_Ehdr e64; -} elf_ehdr; - -typedef union { - Elf32_Phdr e32; - Elf64_Phdr e64; -} elf_phdr; - -typedef union { - Elf32_Shdr e32; - Elf64_Shdr e64; -} elf_shdr; - -typedef union { - Elf32_Sym e32; - Elf64_Sym e64; -} elf_sym; - -typedef union { - Elf32_Rel e32; - Elf64_Rel e64; -} elf_rel; - -typedef union { - Elf32_Rela e32; - Elf64_Rela e64; -} elf_rela; - -typedef union { - Elf32_Note e32; - Elf64_Note e64; -} elf_note; - -struct elf_binary { - /* elf binary */ - const char *image; - size_t size; - char class; - char data; - - const elf_ehdr *ehdr; - const char *sec_strtab; - const elf_shdr *sym_tab; - const char *sym_strtab; - - /* loaded to */ - char *dest; - uint64_t pstart; - uint64_t pend; - uint64_t reloc_offset; - - uint64_t bsd_symtab_pstart; - uint64_t bsd_symtab_pend; - -#ifndef __XEN__ - /* misc */ - FILE *log; -#endif - int verbose; -}; - -/* ------------------------------------------------------------------------ */ -/* accessing elf header fields */ - -#ifdef XEN_ELF_BIG_ENDIAN -# define NATIVE_ELFDATA ELFDATA2MSB -#else -# define NATIVE_ELFDATA ELFDATA2LSB -#endif - -#define elf_32bit(elf) (ELFCLASS32 == (elf)->class) -#define elf_64bit(elf) (ELFCLASS64 == (elf)->class) -#define elf_msb(elf) (ELFDATA2MSB == (elf)->data) -#define elf_lsb(elf) (ELFDATA2LSB == (elf)->data) -#define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data) - -#define elf_uval(elf, str, elem) \ - ((ELFCLASS64 == (elf)->class) \ - ? elf_access_unsigned((elf), (str), \ - offsetof(typeof(*(str)),e64.elem), \ - sizeof((str)->e64.elem)) \ - : elf_access_unsigned((elf), (str), \ - offsetof(typeof(*(str)),e32.elem), \ - sizeof((str)->e32.elem))) - -#define elf_sval(elf, str, elem) \ - ((ELFCLASS64 == (elf)->class) \ - ? elf_access_signed((elf), (str), \ - offsetof(typeof(*(str)),e64.elem), \ - sizeof((str)->e64.elem)) \ - : elf_access_signed((elf), (str), \ - offsetof(typeof(*(str)),e32.elem), \ - sizeof((str)->e32.elem))) - -#define elf_size(elf, str) \ - ((ELFCLASS64 == (elf)->class) \ - ? sizeof((str)->e64) : sizeof((str)->e32)) - -uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr, - uint64_t offset, size_t size); -int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, - uint64_t offset, size_t size); - -uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr); - -/* ------------------------------------------------------------------------ */ -/* xc_libelf_tools.c */ - -int elf_shdr_count(struct elf_binary *elf); -int elf_phdr_count(struct elf_binary *elf); - -const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name); -const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index); -const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index); - -const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr); -const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr); -const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr); - -const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr); -const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr); - -const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol); -const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index); - -const char *elf_note_name(struct elf_binary *elf, const elf_note * note); -const void *elf_note_desc(struct elf_binary *elf, const elf_note * note); -uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note); -const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note); - -int elf_is_elfbinary(const void *image); -int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr); - -/* ------------------------------------------------------------------------ */ -/* xc_libelf_loader.c */ - -int elf_init(struct elf_binary *elf, const char *image, size_t size); -#ifdef __XEN__ -void elf_set_verbose(struct elf_binary *elf); -#else -void elf_set_logfile(struct elf_binary *elf, FILE * log, int verbose); -#endif - -void elf_parse_binary(struct elf_binary *elf); -void elf_load_binary(struct elf_binary *elf); - -void *elf_get_ptr(struct elf_binary *elf, unsigned long addr); -uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol); - -void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */ - -/* ------------------------------------------------------------------------ */ -/* xc_libelf_relocate.c */ - -int elf_reloc(struct elf_binary *elf); - -/* ------------------------------------------------------------------------ */ -/* xc_libelf_dominfo.c */ - -#define UNSET_ADDR ((uint64_t)-1) - -enum xen_elfnote_type { - XEN_ENT_NONE = 0, - XEN_ENT_LONG = 1, - XEN_ENT_STR = 2 -}; - -struct xen_elfnote { - enum xen_elfnote_type type; - const char *name; - union { - const char *str; - uint64_t num; - } data; -}; - -struct elf_dom_parms { - /* raw */ - const char *guest_info; - const void *elf_note_start; - const void *elf_note_end; - struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1]; - - /* parsed */ - char guest_os[16]; - char guest_ver[16]; - char xen_ver[16]; - char loader[16]; - int pae; - int bsd_symtab; - uint64_t virt_base; - uint64_t virt_entry; - uint64_t virt_hypercall; - uint64_t virt_hv_start_low; - uint64_t elf_paddr_offset; - uint32_t f_supported[XENFEAT_NR_SUBMAPS]; - uint32_t f_required[XENFEAT_NR_SUBMAPS]; - - /* calculated */ - uint64_t virt_offset; - uint64_t virt_kstart; - uint64_t virt_kend; -}; - -static inline void elf_xen_feature_set(int nr, uint32_t * addr) -{ - addr[nr >> 5] |= 1 << (nr & 31); -} -static inline int elf_xen_feature_get(int nr, uint32_t * addr) -{ - return !!(addr[nr >> 5] & (1 << (nr & 31))); -} - -int elf_xen_parse_features(const char *features, - uint32_t *supported, - uint32_t *required); -int elf_xen_parse_note(struct elf_binary *elf, - struct elf_dom_parms *parms, - const elf_note *note); -int elf_xen_parse_guest_info(struct elf_binary *elf, - struct elf_dom_parms *parms); -int elf_xen_parse(struct elf_binary *elf, - struct elf_dom_parms *parms); - -#endif /* __XC_LIBELF__ */ Property changes on: projects/clang370-import/sys/xen/interface/libelf.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/arch-powerpc.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-powerpc.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-powerpc.h (nonexistent) @@ -1,120 +0,0 @@ -/* - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (C) IBM Corp. 2005, 2006 - * - * Authors: Hollis Blanchard - */ - -#include "xen.h" - -#ifndef __XEN_PUBLIC_ARCH_PPC_64_H__ -#define __XEN_PUBLIC_ARCH_PPC_64_H__ - -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { \ - int __pad[(sizeof (long long) - sizeof (void *)) / sizeof (int)]; \ - type *p; \ - } __attribute__((__aligned__(8))) __guest_handle_ ## name - -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - ___DEFINE_XEN_GUEST_HANDLE(name, type); \ - ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) -#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) -#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define set_xen_guest_handle(hnd, val) \ - do { \ - if (sizeof ((hnd).__pad)) \ - (hnd).__pad[0] = 0; \ - (hnd).p = val; \ - } while (0) - -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif - -#ifndef __ASSEMBLY__ -typedef unsigned long long xen_pfn_t; -#define PRI_xen_pfn "llx" -#endif - -/* - * Pointers and other address fields inside interface structures are padded to - * 64 bits. This means that field alignments aren't different between 32- and - * 64-bit architectures. - */ -/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */ -#define __MEMORY_PADDING(_X) -#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X) -#define MEMORY_PADDING _MEMORY_PADDING(__LINE__) - -/* And the trap vector is... */ -#define TRAP_INSTR "li 0,-1; sc" /* XXX just "sc"? */ - -#ifndef __ASSEMBLY__ - -#define XENCOMM_INLINE_FLAG (1UL << 63) - -typedef uint64_t xen_ulong_t; - -/* User-accessible registers: nost of these need to be saved/restored - * for every nested Xen invocation. */ -struct cpu_user_regs -{ - uint64_t gprs[32]; - uint64_t lr; - uint64_t ctr; - uint64_t srr0; - uint64_t srr1; - uint64_t pc; - uint64_t msr; - uint64_t fpscr; /* XXX Is this necessary */ - uint64_t xer; - uint64_t hid4; /* debug only */ - uint64_t dar; /* debug only */ - uint32_t dsisr; /* debug only */ - uint32_t cr; - uint32_t __pad; /* good spot for another 32bit reg */ - uint32_t entry_vector; -}; -typedef struct cpu_user_regs cpu_user_regs_t; - -typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ - -/* ONLY used to communicate with dom0! See also struct exec_domain. */ -struct vcpu_guest_context { - cpu_user_regs_t user_regs; /* User-level CPU registers */ - uint64_t sdr1; /* Pagetable base */ - /* XXX etc */ -}; -typedef struct vcpu_guest_context vcpu_guest_context_t; -DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); - -struct arch_shared_info { - uint64_t boot_timebase; -}; - -struct arch_vcpu_info { -}; - -/* Support for multi-processor guests. */ -#define MAX_VIRT_CPUS 32 -#endif - -#endif Property changes on: projects/clang370-import/sys/xen/interface/arch-powerpc.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -1 \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/arch-arm/hvm/save.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-arm/hvm/save.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-arm/hvm/save.h (revision 288926) @@ -1,39 +1,39 @@ /* * Structure definitions for HVM state that is held by Xen and must * be saved along with the domain's memory and device-model state. * * Copyright (c) 2012 Citrix Systems Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ #define __XEN_PUBLIC_HVM_SAVE_ARM_H__ #endif /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/arch-arm.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-arm.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-arm.h (revision 288926) @@ -1,176 +1,466 @@ /****************************************************************************** * arch-arm.h * * Guest OS interface to ARM Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright 2011 (C) Citrix Systems */ #ifndef __XEN_PUBLIC_ARCH_ARM_H__ #define __XEN_PUBLIC_ARCH_ARM_H__ -/* hypercall calling convention - * ---------------------------- +/* + * `incontents 50 arm_abi Hypercall Calling Convention * * A hypercall is issued using the ARM HVC instruction. * * A hypercall can take up to 5 arguments. These are passed in - * registers, the first argument in r0, the second argument in r1, the - * third in r2, the forth in r3 and the fifth in r4. + * registers, the first argument in x0/r0 (for arm64/arm32 guests + * respectively irrespective of whether the underlying hypervisor is + * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2, + * the forth in x3/r3 and the fifth in x4/r4. * - * The hypercall number is passed in r12. + * The hypercall number is passed in r12 (arm) or x16 (arm64). In both + * cases the relevant ARM procedure calling convention specifies this + * is an inter-procedure-call scratch register (e.g. for use in linker + * stubs). This use does not conflict with use during a hypercall. * * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG. * - * The return value is in r0. + * The return value is in x0/r0. * - * The hypercall will clobber r12 and the argument registers used by - * that hypercall (except r0 which is the return value) i.e. a 2 - * argument hypercall will clobber r1 and a 4 argument hypercall will - * clobber r1, r2 and r3. + * The hypercall will clobber x16/r12 and the argument registers used + * by that hypercall (except r0 which is the return value) i.e. in + * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a + * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3. * + * Parameter structs passed to hypercalls are laid out according to + * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA + * EABI) and Procedure Call Standard for the ARM 64-bit Architecture + * (AAPCS64). Where there is a conflict the 64-bit standard should be + * used regardless of guest type. Structures which are passed as + * hypercall arguments are always little endian. + * + * All memory which is shared with other entities in the system + * (including the hypervisor and other guests) must reside in memory + * which is mapped as Normal Inner-cacheable. This applies to: + * - hypercall arguments passed via a pointer to guest memory. + * - memory shared via the grant table mechanism (including PV I/O + * rings etc). + * - memory shared with the hypervisor (struct shared_info, struct + * vcpu_info, the grant table, etc). + * + * Any Inner cache allocation strategy (Write-Back, Write-Through etc) + * is acceptable. There is no restriction on the Outer-cacheability. */ +/* + * `incontents 55 arm_hcall Supported Hypercalls + * + * Xen on ARM makes extensive use of hardware facilities and therefore + * only a subset of the potential hypercalls are required. + * + * Since ARM uses second stage paging any machine/physical addresses + * passed to hypercalls are Guest Physical Addresses (Intermediate + * Physical Addresses) unless otherwise noted. + * + * The following hypercalls (and sub operations) are supported on the + * ARM platform. Other hypercalls should be considered + * unavailable/unsupported. + * + * HYPERVISOR_memory_op + * All generic sub-operations + * + * HYPERVISOR_domctl + * All generic sub-operations, with the exception of: + * * XEN_DOMCTL_irq_permission (not yet implemented) + * + * HYPERVISOR_sched_op + * All generic sub-operations, with the exception of: + * * SCHEDOP_block -- prefer wfi hardware instruction + * + * HYPERVISOR_console_io + * All generic sub-operations + * + * HYPERVISOR_xen_version + * All generic sub-operations + * + * HYPERVISOR_event_channel_op + * All generic sub-operations + * + * HYPERVISOR_physdev_op + * No sub-operations are currenty supported + * + * HYPERVISOR_sysctl + * All generic sub-operations, with the exception of: + * * XEN_SYSCTL_page_offline_op + * * XEN_SYSCTL_get_pmstat + * * XEN_SYSCTL_pm_op + * + * HYPERVISOR_hvm_op + * Exactly these sub-operations are supported: + * * HVMOP_set_param + * * HVMOP_get_param + * + * HYPERVISOR_grant_table_op + * All generic sub-operations + * + * HYPERVISOR_vcpu_op + * Exactly these sub-operations are supported: + * * VCPUOP_register_vcpu_info + * * VCPUOP_register_runstate_memory_area + * + * + * Other notes on the ARM ABI: + * + * - struct start_info is not exported to ARM guests. + * + * - struct shared_info is mapped by ARM guests using the + * HYPERVISOR_memory_op sub-op XENMEM_add_to_physmap, passing + * XENMAPSPACE_shared_info as space parameter. + * + * - All the per-cpu struct vcpu_info are mapped by ARM guests using the + * HYPERVISOR_vcpu_op sub-op VCPUOP_register_vcpu_info, including cpu0 + * struct vcpu_info. + * + * - The grant table is mapped using the HYPERVISOR_memory_op sub-op + * XENMEM_add_to_physmap, passing XENMAPSPACE_grant_table as space + * parameter. The memory range specified under the Xen compatible + * hypervisor node on device tree can be used as target gpfn for the + * mapping. + * + * - Xenstore is initialized by using the two hvm_params + * HVM_PARAM_STORE_PFN and HVM_PARAM_STORE_EVTCHN. They can be read + * with the HYPERVISOR_hvm_op sub-op HVMOP_get_param. + * + * - The paravirtualized console is initialized by using the two + * hvm_params HVM_PARAM_CONSOLE_PFN and HVM_PARAM_CONSOLE_EVTCHN. They + * can be read with the HYPERVISOR_hvm_op sub-op HVMOP_get_param. + * + * - Event channel notifications are delivered using the percpu GIC + * interrupt specified under the Xen compatible hypervisor node on + * device tree. + * + * - The device tree Xen compatible node is fully described under Linux + * at Documentation/devicetree/bindings/arm/xen.txt. + */ + #define XEN_HYPERCALL_TAG 0XEA1 +#define int64_aligned_t int64_t __attribute__((aligned(8))) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) #ifndef __ASSEMBLY__ -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } __guest_handle_ ## name +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef union { type *p; unsigned long q; } \ + __guest_handle_ ## name; \ + typedef union { type *p; uint64_aligned_t q; } \ + __guest_handle_64_ ## name; +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes + * aligned. + * XEN_GUEST_HANDLE_PARAM represents a guest pointer, when passed as an + * hypercall argument. It is 4 bytes on aarch32 and 8 bytes on aarch64. + */ #define __DEFINE_XEN_GUEST_HANDLE(name, type) \ ___DEFINE_XEN_GUEST_HANDLE(name, type); \ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) -#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define __XEN_GUEST_HANDLE(name) __guest_handle_64_ ## name #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) -#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) +#define XEN_GUEST_HANDLE_PARAM(name) __guest_handle_ ## name +#define set_xen_guest_handle_raw(hnd, val) \ + do { \ + typeof(&(hnd)) _sxghr_tmp = &(hnd); \ + _sxghr_tmp->q = 0; \ + _sxghr_tmp->p = val; \ + } while ( 0 ) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) -struct cpu_user_regs +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */ +# define __DECL_REG(n64, n32) union { \ + uint64_t n64; \ + uint32_t n32; \ + } +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */ +#define __DECL_REG(n64, n32) uint64_t n64 +#endif + +struct vcpu_guest_core_regs { - uint32_t r0; - uint32_t r1; - uint32_t r2; - uint32_t r3; - uint32_t r4; - uint32_t r5; - uint32_t r6; - uint32_t r7; - uint32_t r8; - uint32_t r9; - uint32_t r10; - union { - uint32_t r11; - uint32_t fp; - }; - uint32_t r12; + /* Aarch64 Aarch32 */ + __DECL_REG(x0, r0_usr); + __DECL_REG(x1, r1_usr); + __DECL_REG(x2, r2_usr); + __DECL_REG(x3, r3_usr); + __DECL_REG(x4, r4_usr); + __DECL_REG(x5, r5_usr); + __DECL_REG(x6, r6_usr); + __DECL_REG(x7, r7_usr); + __DECL_REG(x8, r8_usr); + __DECL_REG(x9, r9_usr); + __DECL_REG(x10, r10_usr); + __DECL_REG(x11, r11_usr); + __DECL_REG(x12, r12_usr); - uint32_t sp; /* r13 - SP: Valid for Hyp. frames only, o/w banked (see below) */ + __DECL_REG(x13, sp_usr); + __DECL_REG(x14, lr_usr); - /* r14 - LR: is the same physical register as LR_usr */ - union { - uint32_t lr; /* r14 - LR: Valid for Hyp. Same physical register as lr_usr. */ - uint32_t lr_usr; - }; + __DECL_REG(x15, __unused_sp_hyp); - uint32_t pc; /* Return IP */ - uint32_t cpsr; /* Return mode */ - uint32_t pad0; /* Doubleword-align the kernel half of the frame */ + __DECL_REG(x16, lr_irq); + __DECL_REG(x17, sp_irq); - /* Outer guest frame only from here on... */ + __DECL_REG(x18, lr_svc); + __DECL_REG(x19, sp_svc); - uint32_t r8_fiq, r9_fiq, r10_fiq, r11_fiq, r12_fiq; + __DECL_REG(x20, lr_abt); + __DECL_REG(x21, sp_abt); - uint32_t sp_usr; /* LR_usr is the same register as LR, see above */ + __DECL_REG(x22, lr_und); + __DECL_REG(x23, sp_und); - uint32_t sp_svc, sp_abt, sp_und, sp_irq, sp_fiq; - uint32_t lr_svc, lr_abt, lr_und, lr_irq, lr_fiq; + __DECL_REG(x24, r8_fiq); + __DECL_REG(x25, r9_fiq); + __DECL_REG(x26, r10_fiq); + __DECL_REG(x27, r11_fiq); + __DECL_REG(x28, r12_fiq); - uint32_t spsr_svc, spsr_abt, spsr_und, spsr_irq, spsr_fiq; + __DECL_REG(x29, sp_fiq); + __DECL_REG(x30, lr_fiq); - uint32_t pad1; /* Doubleword-align the user half of the frame */ + /* Return address and mode */ + __DECL_REG(pc64, pc32); /* ELR_EL2 */ + uint32_t cpsr; /* SPSR_EL2 */ + + union { + uint32_t spsr_el1; /* AArch64 */ + uint32_t spsr_svc; /* AArch32 */ + }; + + /* AArch32 guests only */ + uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt; + + /* AArch64 guests only */ + uint64_t sp_el0; + uint64_t sp_el1, elr_el1; }; -typedef struct cpu_user_regs cpu_user_regs_t; -DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); +typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t); +#undef __DECL_REG + typedef uint64_t xen_pfn_t; #define PRI_xen_pfn PRIx64 /* Maximum number of virtual CPUs in legacy multi-processor guests. */ /* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */ #define XEN_LEGACY_MAX_VCPUS 1 -typedef uint32_t xen_ulong_t; +typedef uint64_t xen_ulong_t; +#define PRI_xen_ulong PRIx64 +#if defined(__XEN__) || defined(__XEN_TOOLS__) struct vcpu_guest_context { - struct cpu_user_regs user_regs; /* User-level CPU registers */ +#define _VGCF_online 0 +#define VGCF_online (1<<_VGCF_online) + uint32_t flags; /* VGCF_* */ + struct vcpu_guest_core_regs user_regs; /* Core CPU registers */ + uint32_t sctlr; - uint32_t ttbr0, ttbr1, ttbcr; + uint64_t ttbcr, ttbr0, ttbr1; }; typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); -struct arch_vcpu_info { }; +/* + * struct xen_arch_domainconfig's ABI is covered by + * XEN_DOMCTL_INTERFACE_VERSION. + */ +#define XEN_DOMCTL_CONFIG_GIC_NATIVE 0 +#define XEN_DOMCTL_CONFIG_GIC_V2 1 +#define XEN_DOMCTL_CONFIG_GIC_V3 2 +struct xen_arch_domainconfig { + /* IN/OUT */ + uint8_t gic_version; + /* IN */ + uint32_t nr_spis; + /* + * OUT + * Based on the property clock-frequency in the DT timer node. + * The property may be present when the bootloader/firmware doesn't + * set correctly CNTFRQ which hold the timer frequency. + * + * As it's not possible to trap this register, we have to replicate + * the value in the guest DT. + * + * = 0 => property not present + * > 0 => Value of the property + * + */ + uint32_t clock_frequency; +}; +#endif /* __XEN__ || __XEN_TOOLS__ */ + +struct arch_vcpu_info { +}; typedef struct arch_vcpu_info arch_vcpu_info_t; -struct arch_shared_info { }; +struct arch_shared_info { +}; typedef struct arch_shared_info arch_shared_info_t; typedef uint64_t xen_callback_t; -#endif /* ifndef __ASSEMBLY __ */ +#endif -/* PSR bits (CPSR, SPSR)*/ +#if defined(__XEN__) || defined(__XEN_TOOLS__) -/* 0-4: Mode */ -#define PSR_MODE_MASK 0x1f +/* PSR bits (CPSR, SPSR) */ + +#define PSR_THUMB (1<<5) /* Thumb Mode enable */ +#define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */ +#define PSR_IRQ_MASK (1<<7) /* Interrupt mask */ +#define PSR_ABT_MASK (1<<8) /* Asynchronous Abort mask */ +#define PSR_BIG_ENDIAN (1<<9) /* arm32: Big Endian Mode */ +#define PSR_DBG_MASK (1<<9) /* arm64: Debug Exception mask */ +#define PSR_IT_MASK (0x0600fc00) /* Thumb If-Then Mask */ +#define PSR_JAZELLE (1<<24) /* Jazelle Mode */ + +/* 32 bit modes */ #define PSR_MODE_USR 0x10 #define PSR_MODE_FIQ 0x11 #define PSR_MODE_IRQ 0x12 #define PSR_MODE_SVC 0x13 #define PSR_MODE_MON 0x16 #define PSR_MODE_ABT 0x17 #define PSR_MODE_HYP 0x1a #define PSR_MODE_UND 0x1b #define PSR_MODE_SYS 0x1f -#define PSR_THUMB (1<<5) /* Thumb Mode enable */ -#define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */ -#define PSR_IRQ_MASK (1<<7) /* Interrupt mask */ -#define PSR_ABT_MASK (1<<8) /* Asynchronous Abort mask */ -#define PSR_BIG_ENDIAN (1<<9) /* Big Endian Mode */ -#define PSR_JAZELLE (1<<24) /* Jazelle Mode */ +/* 64 bit modes */ +#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */ +#define PSR_MODE_EL3h 0x0d +#define PSR_MODE_EL3t 0x0c +#define PSR_MODE_EL2h 0x09 +#define PSR_MODE_EL2t 0x08 +#define PSR_MODE_EL1h 0x05 +#define PSR_MODE_EL1t 0x04 +#define PSR_MODE_EL0t 0x00 +#define PSR_GUEST32_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC) +#define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h) + +#define SCTLR_GUEST_INIT 0x00c50078 + +/* + * Virtual machine platform (memory layout, interrupts) + * + * These are defined for consistency between the tools and the + * hypervisor. Guests must not rely on these hardcoded values but + * should instead use the FDT. + */ + +/* Physical Address Space */ + +/* + * vGIC mappings: Only one set of mapping is used by the guest. + * Therefore they can overlap. + */ + +/* vGIC v2 mappings */ +#define GUEST_GICD_BASE 0x03001000ULL +#define GUEST_GICD_SIZE 0x00001000ULL +#define GUEST_GICC_BASE 0x03002000ULL +#define GUEST_GICC_SIZE 0x00000100ULL + +/* vGIC v3 mappings */ +#define GUEST_GICV3_GICD_BASE 0x03001000ULL +#define GUEST_GICV3_GICD_SIZE 0x00010000ULL + +#define GUEST_GICV3_RDIST_STRIDE 0x20000ULL +#define GUEST_GICV3_RDIST_REGIONS 1 + +#define GUEST_GICV3_GICR0_BASE 0x03020000ULL /* vCPU0 - vCPU127 */ +#define GUEST_GICV3_GICR0_SIZE 0x01000000ULL + +/* + * 16MB == 4096 pages reserved for guest to use as a region to map its + * grant table in. + */ +#define GUEST_GNTTAB_BASE 0x38000000ULL +#define GUEST_GNTTAB_SIZE 0x01000000ULL + +#define GUEST_MAGIC_BASE 0x39000000ULL +#define GUEST_MAGIC_SIZE 0x01000000ULL + +#define GUEST_RAM_BANKS 2 + +#define GUEST_RAM0_BASE 0x40000000ULL /* 3GB of low RAM @ 1GB */ +#define GUEST_RAM0_SIZE 0xc0000000ULL + +#define GUEST_RAM1_BASE 0x0200000000ULL /* 1016GB of RAM @ 8GB */ +#define GUEST_RAM1_SIZE 0xfe00000000ULL + +#define GUEST_RAM_BASE GUEST_RAM0_BASE /* Lowest RAM address */ +/* Largest amount of actual RAM, not including holes */ +#define GUEST_RAM_MAX (GUEST_RAM0_SIZE + GUEST_RAM1_SIZE) +/* Suitable for e.g. const uint64_t ramfoo[] = GUEST_RAM_BANK_FOOS; */ +#define GUEST_RAM_BANK_BASES { GUEST_RAM0_BASE, GUEST_RAM1_BASE } +#define GUEST_RAM_BANK_SIZES { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE } + +/* Interrupts */ +#define GUEST_TIMER_VIRT_PPI 27 +#define GUEST_TIMER_PHYS_S_PPI 29 +#define GUEST_TIMER_PHYS_NS_PPI 30 +#define GUEST_EVTCHN_PPI 31 + +/* PSCI functions */ +#define PSCI_cpu_suspend 0 +#define PSCI_cpu_off 1 +#define PSCI_cpu_on 2 +#define PSCI_migrate 3 + +#endif + +#ifndef __ASSEMBLY__ +/* Stub definition of PMU structure */ +typedef struct xen_pmu_arch { uint8_t dummy; } xen_pmu_arch_t; +#endif + #endif /* __XEN_PUBLIC_ARCH_ARM_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/arch-x86/cpuid.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/cpuid.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-x86/cpuid.h (revision 288926) @@ -1,68 +1,90 @@ /****************************************************************************** * arch-x86/cpuid.h * * CPUID interface to Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2007 Citrix Systems, Inc. * * Authors: * Keir Fraser */ #ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ #define __XEN_PUBLIC_ARCH_X86_CPUID_H__ -/* Xen identification leaves start at 0x40000000. */ +/* + * For compatibility with other hypervisor interfaces, the Xen cpuid leaves + * can be found at the first otherwise unused 0x100 aligned boundary starting + * from 0x40000000. + * + * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid + * leaves will start at 0x40000100 + */ + #define XEN_CPUID_FIRST_LEAF 0x40000000 #define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) /* - * Leaf 1 (0x40000000) + * Leaf 1 (0x40000x00) * EAX: Largest Xen-information leaf. All leaves up to an including @EAX * are supported by the Xen host. * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification * of a Xen host. */ #define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ #define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ #define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ /* - * Leaf 2 (0x40000001) + * Leaf 2 (0x40000x01) * EAX[31:16]: Xen major version. * EAX[15: 0]: Xen minor version. * EBX-EDX: Reserved (currently all zeroes). */ /* - * Leaf 3 (0x40000002) + * Leaf 3 (0x40000x02) * EAX: Number of hypercall transfer pages. This register is always guaranteed * to specify one hypercall page. * EBX: Base address of Xen-specific MSRs. * ECX: Features 1. Unused bits are set to zero. * EDX: Features 2. Unused bits are set to zero. */ /* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ #define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +/* + * Leaf 5 (0x40000x04) + * HVM-specific features + * EAX: Features + * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) + */ +#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ +#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ +/* Memory mapped from other domains has valid IOMMU entries */ +#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ + +#define XEN_CPUID_MAX_NUM_LEAVES 4 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ Index: projects/clang370-import/sys/xen/interface/arch-x86/hvm/save.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/hvm/save.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-x86/hvm/save.h (revision 288926) @@ -1,589 +1,630 @@ /* * Structure definitions for HVM state that is held by Xen and must * be saved along with the domain's memory and device-model state. * * Copyright (c) 2007 XenSource Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ #define __XEN_PUBLIC_HVM_SAVE_X86_H__ /* * Save/restore header: general info about the save file. */ #define HVM_FILE_MAGIC 0x54381286 #define HVM_FILE_VERSION 0x00000001 struct hvm_save_header { uint32_t magic; /* Must be HVM_FILE_MAGIC */ uint32_t version; /* File format version */ uint64_t changeset; /* Version of Xen that saved this file */ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ }; DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); /* * Processor * * Compat: Pre-3.4 didn't have msr_tsc_aux */ struct hvm_hw_cpu { uint8_t fpu_regs[512]; uint64_t rax; uint64_t rbx; uint64_t rcx; uint64_t rdx; uint64_t rbp; uint64_t rsi; uint64_t rdi; uint64_t rsp; uint64_t r8; uint64_t r9; uint64_t r10; uint64_t r11; uint64_t r12; uint64_t r13; uint64_t r14; uint64_t r15; uint64_t rip; uint64_t rflags; uint64_t cr0; uint64_t cr2; uint64_t cr3; uint64_t cr4; uint64_t dr0; uint64_t dr1; uint64_t dr2; uint64_t dr3; uint64_t dr6; uint64_t dr7; uint32_t cs_sel; uint32_t ds_sel; uint32_t es_sel; uint32_t fs_sel; uint32_t gs_sel; uint32_t ss_sel; uint32_t tr_sel; uint32_t ldtr_sel; uint32_t cs_limit; uint32_t ds_limit; uint32_t es_limit; uint32_t fs_limit; uint32_t gs_limit; uint32_t ss_limit; uint32_t tr_limit; uint32_t ldtr_limit; uint32_t idtr_limit; uint32_t gdtr_limit; uint64_t cs_base; uint64_t ds_base; uint64_t es_base; uint64_t fs_base; uint64_t gs_base; uint64_t ss_base; uint64_t tr_base; uint64_t ldtr_base; uint64_t idtr_base; uint64_t gdtr_base; uint32_t cs_arbytes; uint32_t ds_arbytes; uint32_t es_arbytes; uint32_t fs_arbytes; uint32_t gs_arbytes; uint32_t ss_arbytes; uint32_t tr_arbytes; uint32_t ldtr_arbytes; uint64_t sysenter_cs; uint64_t sysenter_esp; uint64_t sysenter_eip; /* msr for em64t */ uint64_t shadow_gs; /* msr content saved/restored. */ uint64_t msr_flags; uint64_t msr_lstar; uint64_t msr_star; uint64_t msr_cstar; uint64_t msr_syscall_mask; uint64_t msr_efer; uint64_t msr_tsc_aux; /* guest's idea of what rdtsc() would return */ uint64_t tsc; /* pending event, if any */ union { uint32_t pending_event; struct { uint8_t pending_vector:8; uint8_t pending_type:3; uint8_t pending_error_valid:1; uint32_t pending_reserved:19; uint8_t pending_valid:1; }; }; /* error code for pending event */ uint32_t error_code; }; struct hvm_hw_cpu_compat { uint8_t fpu_regs[512]; uint64_t rax; uint64_t rbx; uint64_t rcx; uint64_t rdx; uint64_t rbp; uint64_t rsi; uint64_t rdi; uint64_t rsp; uint64_t r8; uint64_t r9; uint64_t r10; uint64_t r11; uint64_t r12; uint64_t r13; uint64_t r14; uint64_t r15; uint64_t rip; uint64_t rflags; uint64_t cr0; uint64_t cr2; uint64_t cr3; uint64_t cr4; uint64_t dr0; uint64_t dr1; uint64_t dr2; uint64_t dr3; uint64_t dr6; uint64_t dr7; uint32_t cs_sel; uint32_t ds_sel; uint32_t es_sel; uint32_t fs_sel; uint32_t gs_sel; uint32_t ss_sel; uint32_t tr_sel; uint32_t ldtr_sel; uint32_t cs_limit; uint32_t ds_limit; uint32_t es_limit; uint32_t fs_limit; uint32_t gs_limit; uint32_t ss_limit; uint32_t tr_limit; uint32_t ldtr_limit; uint32_t idtr_limit; uint32_t gdtr_limit; uint64_t cs_base; uint64_t ds_base; uint64_t es_base; uint64_t fs_base; uint64_t gs_base; uint64_t ss_base; uint64_t tr_base; uint64_t ldtr_base; uint64_t idtr_base; uint64_t gdtr_base; uint32_t cs_arbytes; uint32_t ds_arbytes; uint32_t es_arbytes; uint32_t fs_arbytes; uint32_t gs_arbytes; uint32_t ss_arbytes; uint32_t tr_arbytes; uint32_t ldtr_arbytes; uint64_t sysenter_cs; uint64_t sysenter_esp; uint64_t sysenter_eip; /* msr for em64t */ uint64_t shadow_gs; /* msr content saved/restored. */ uint64_t msr_flags; uint64_t msr_lstar; uint64_t msr_star; uint64_t msr_cstar; uint64_t msr_syscall_mask; uint64_t msr_efer; /*uint64_t msr_tsc_aux; COMPAT */ /* guest's idea of what rdtsc() would return */ uint64_t tsc; /* pending event, if any */ union { uint32_t pending_event; struct { uint8_t pending_vector:8; uint8_t pending_type:3; uint8_t pending_error_valid:1; uint32_t pending_reserved:19; uint8_t pending_valid:1; }; }; /* error code for pending event */ uint32_t error_code; }; static inline int _hvm_hw_fix_cpu(void *h) { - struct hvm_hw_cpu *new=h; - struct hvm_hw_cpu_compat *old=h; + union hvm_hw_cpu_union { + struct hvm_hw_cpu nat; + struct hvm_hw_cpu_compat cmp; + } *ucpu = (union hvm_hw_cpu_union *)h; + /* If we copy from the end backwards, we should * be able to do the modification in-place */ - new->error_code=old->error_code; - new->pending_event=old->pending_event; - new->tsc=old->tsc; - new->msr_tsc_aux=0; + ucpu->nat.error_code = ucpu->cmp.error_code; + ucpu->nat.pending_event = ucpu->cmp.pending_event; + ucpu->nat.tsc = ucpu->cmp.tsc; + ucpu->nat.msr_tsc_aux = 0; return 0; } DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \ struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu); /* * PIC */ struct hvm_hw_vpic { /* IR line bitmasks. */ uint8_t irr; uint8_t imr; uint8_t isr; /* Line IRx maps to IRQ irq_base+x */ uint8_t irq_base; /* * Where are we in ICW2-4 initialisation (0 means no init in progress)? * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) */ uint8_t init_state:4; /* IR line with highest priority. */ uint8_t priority_add:4; /* Reads from A=0 obtain ISR or IRR? */ uint8_t readsel_isr:1; /* Reads perform a polling read? */ uint8_t poll:1; /* Automatically clear IRQs from the ISR during INTA? */ uint8_t auto_eoi:1; /* Automatically rotate IRQ priorities during AEOI? */ uint8_t rotate_on_auto_eoi:1; /* Exclude slave inputs when considering in-service IRQs? */ uint8_t special_fully_nested_mode:1; /* Special mask mode excludes masked IRs from AEOI and priority checks. */ uint8_t special_mask_mode:1; /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ uint8_t is_master:1; /* Edge/trigger selection. */ uint8_t elcr; /* Virtual INT output. */ uint8_t int_output; }; DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); /* * IO-APIC */ #define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ struct hvm_hw_vioapic { uint64_t base_address; uint32_t ioregsel; uint32_t id; union vioapic_redir_entry { uint64_t bits; struct { uint8_t vector; uint8_t delivery_mode:3; uint8_t dest_mode:1; uint8_t delivery_status:1; uint8_t polarity:1; uint8_t remote_irr:1; uint8_t trig_mode:1; uint8_t mask:1; uint8_t reserve:7; uint8_t reserved[4]; uint8_t dest_id; } fields; } redirtbl[VIOAPIC_NUM_PINS]; }; DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); /* * LAPIC */ struct hvm_hw_lapic { uint64_t apic_base_msr; uint32_t disabled; /* VLAPIC_xx_DISABLED */ uint32_t timer_divisor; uint64_t tdt_msr; }; DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); struct hvm_hw_lapic_regs { uint8_t data[1024]; }; DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); /* * IRQs */ struct hvm_hw_pci_irqs { /* * Virtual interrupt wires for a single PCI bus. * Indexed by: device*4 + INTx#. */ union { unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ uint64_t pad[2]; }; }; DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); struct hvm_hw_isa_irqs { /* * Virtual interrupt wires for ISA devices. * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). */ union { unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ uint64_t pad[1]; }; }; DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); struct hvm_hw_pci_link { /* * PCI-ISA interrupt router. * Each PCI is 'wire-ORed' into one of four links using * the traditional 'barber's pole' mapping ((device + INTx#) & 3). * The router provides a programmable mapping from each link to a GSI. */ uint8_t route[4]; uint8_t pad0[4]; }; DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); /* * PIT */ struct hvm_hw_pit { struct hvm_hw_pit_channel { uint32_t count; /* can be 65536 */ uint16_t latched_count; uint8_t count_latched; uint8_t status_latched; uint8_t status; uint8_t read_state; uint8_t write_state; uint8_t write_latch; uint8_t rw_mode; uint8_t mode; uint8_t bcd; /* not supported */ uint8_t gate; /* timer start */ } channels[3]; /* 3 x 16 bytes */ uint32_t speaker_data_on; uint32_t pad0; }; DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); /* * RTC */ #define RTC_CMOS_SIZE 14 struct hvm_hw_rtc { /* CMOS bytes */ uint8_t cmos_data[RTC_CMOS_SIZE]; /* Index register for 2-part operations */ uint8_t cmos_index; uint8_t pad0; }; DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); /* * HPET */ #define HPET_TIMER_NUM 3 /* 3 timers supported now */ struct hvm_hw_hpet { /* Memory-mapped, software visible registers */ uint64_t capability; /* capabilities */ uint64_t res0; /* reserved */ uint64_t config; /* configuration */ uint64_t res1; /* reserved */ uint64_t isr; /* interrupt status reg */ uint64_t res2[25]; /* reserved */ uint64_t mc64; /* main counter */ uint64_t res3; /* reserved */ struct { /* timers */ uint64_t config; /* configuration/cap */ uint64_t cmp; /* comparator */ uint64_t fsb; /* FSB route, not supported now */ uint64_t res4; /* reserved */ } timers[HPET_TIMER_NUM]; uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ /* Hidden register state */ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ }; DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); /* * PM timer */ struct hvm_hw_pmtimer { uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ }; DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); /* * MTRR MSRs */ struct hvm_hw_mtrr { #define MTRR_VCNT 8 #define NUM_FIXED_MSR 11 uint64_t msr_pat_cr; /* mtrr physbase & physmask msr pair*/ uint64_t msr_mtrr_var[MTRR_VCNT*2]; uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; uint64_t msr_mtrr_cap; uint64_t msr_mtrr_def_type; }; DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); /* * The save area of XSAVE/XRSTOR. */ struct hvm_hw_cpu_xsave { - uint64_t xfeature_mask; + uint64_t xfeature_mask; /* Ignored */ uint64_t xcr0; /* Updated by XSETBV */ uint64_t xcr0_accum; /* Updated by XSETBV */ struct { struct { char x[512]; } fpu_sse; struct { uint64_t xstate_bv; /* Updated by XRSTOR */ uint64_t reserved[7]; } xsave_hdr; /* The 64-byte header */ struct { char x[0]; } ymm; /* YMM */ } save_area; }; #define CPU_XSAVE_CODE 16 /* * Viridian hypervisor context. */ struct hvm_viridian_domain_context { uint64_t hypercall_gpa; uint64_t guest_os_id; + uint64_t time_ref_count; + uint64_t reference_tsc; }; DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); struct hvm_viridian_vcpu_context { uint64_t apic_assist; }; DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); struct hvm_vmce_vcpu { uint64_t caps; + uint64_t mci_ctl2_bank0; + uint64_t mci_ctl2_bank1; }; DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); +struct hvm_tsc_adjust { + uint64_t tsc_adjust; +}; + +DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); + + +struct hvm_msr { + uint32_t count; + struct hvm_one_msr { + uint32_t index; + uint32_t _rsvd; + uint64_t val; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + } msr[]; +#elif defined(__GNUC__) + } msr[0]; +#else + } msr[1 /* variable size */]; +#endif +}; + +#define CPU_MSR_CODE 20 + /* * Largest type-code in use */ -#define HVM_SAVE_CODE_MAX 18 +#define HVM_SAVE_CODE_MAX 20 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Index: projects/clang370-import/sys/xen/interface/arch-x86/pmu.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/pmu.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/arch-x86/pmu.h (revision 288926) @@ -0,0 +1,167 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_PMU_H__ +#define __XEN_PUBLIC_ARCH_X86_PMU_H__ + +/* x86-specific PMU definitions */ + +/* AMD PMU registers and structures */ +struct xen_pmu_amd_ctxt { + /* + * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). + * For PV(H) guests these fields are RO. + */ + uint32_t counters; + uint32_t ctrls; + + /* Counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; +typedef struct xen_pmu_amd_ctxt xen_pmu_amd_ctxt_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmu_amd_ctxt_t); + +/* Intel PMU registers and structures */ +struct xen_pmu_cntr_pair { + uint64_t counter; + uint64_t control; +}; +typedef struct xen_pmu_cntr_pair xen_pmu_cntr_pair_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmu_cntr_pair_t); + +struct xen_pmu_intel_ctxt { + /* + * Offsets to fixed and architectural counter MSRs (relative to + * xen_pmu_arch.c.intel). + * For PV(H) guests these fields are RO. + */ + uint32_t fixed_counters; + uint32_t arch_counters; + + /* PMU registers */ + uint64_t global_ctrl; + uint64_t global_ovf_ctrl; + uint64_t global_status; + uint64_t fixed_ctrl; + uint64_t ds_area; + uint64_t pebs_enable; + uint64_t debugctl; + + /* Fixed and architectural counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; +typedef struct xen_pmu_intel_ctxt xen_pmu_intel_ctxt_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmu_intel_ctxt_t); + +/* Sampled domain's registers */ +struct xen_pmu_regs { + uint64_t ip; + uint64_t sp; + uint64_t flags; + uint16_t cs; + uint16_t ss; + uint8_t cpl; + uint8_t pad[3]; +}; +typedef struct xen_pmu_regs xen_pmu_regs_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t); + +/* PMU flags */ +#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ + +/* + * Architecture-specific information describing state of the processor at + * the time of PMU interrupt. + * Fields of this structure marked as RW for guest should only be written by + * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the + * hypervisor during PMU interrupt). Hypervisor will read updated data in + * XENPMU_flush hypercall and clear PMU_CACHED bit. + */ +struct xen_pmu_arch { + union { + /* + * Processor's registers at the time of interrupt. + * WO for hypervisor, RO for guests. + */ + struct xen_pmu_regs regs; + /* Padding for adding new registers to xen_pmu_regs in the future */ +#define XENPMU_REGS_PAD_SZ 64 + uint8_t pad[XENPMU_REGS_PAD_SZ]; + } r; + + /* WO for hypervisor, RO for guest */ + uint64_t pmu_flags; + + /* + * APIC LVTPC register. + * RW for both hypervisor and guest. + * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware + * during XENPMU_flush or XENPMU_lvtpc_set. + */ + union { + uint32_t lapic_lvtpc; + uint64_t pad; + } l; + + /* + * Vendor-specific PMU registers. + * RW for both hypervisor and guest (see exceptions above). + * Guest's updates to this field are verified and then loaded by the + * hypervisor into hardware during XENPMU_flush + */ + union { + struct xen_pmu_amd_ctxt amd; + struct xen_pmu_intel_ctxt intel; + + /* + * Padding for contexts (fixed parts only, does not include MSR banks + * that are specified by offsets) + */ +#define XENPMU_CTXT_PAD_SZ 128 + uint8_t pad[XENPMU_CTXT_PAD_SZ]; + } c; +}; +typedef struct xen_pmu_arch xen_pmu_arch_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmu_arch_t); + +#endif /* __XEN_PUBLIC_ARCH_X86_PMU_H__ */ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + Property changes on: projects/clang370-import/sys/xen/interface/arch-x86/pmu.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/arch-x86/xen-mca.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/xen-mca.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-x86/xen-mca.h (revision 288926) @@ -1,440 +1,440 @@ /****************************************************************************** * arch-x86/mca.h * * Contributed by Advanced Micro Devices, Inc. * Author: Christoph Egger * * Guest OS machine check interface to x86 Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* Full MCA functionality has the following Usecases from the guest side: * * Must have's: * 1. Dom0 and DomU register machine check trap callback handlers * (already done via "set_trap_table" hypercall) * 2. Dom0 registers machine check event callback handler * (doable via EVTCHNOP_bind_virq) * 3. Dom0 and DomU fetches machine check data * 4. Dom0 wants Xen to notify a DomU * 5. Dom0 gets DomU ID from physical address * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") * * Nice to have's: * 7. Dom0 wants Xen to deactivate a physical CPU * This is better done as separate task, physical CPU hotplugging, * and hypercall(s) should be sysctl's * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to * move a DomU (or Dom0 itself) away from a malicious page * producing correctable errors. * 9. offlining physical page: * Xen free's and never re-uses a certain physical page. * 10. Testfacility: Allow Dom0 to write values into machine check MSR's * and tell Xen to trigger a machine check */ #ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ #define __XEN_PUBLIC_ARCH_X86_MCA_H__ /* Hypercall */ #define __HYPERVISOR_mca __HYPERVISOR_arch_0 /* * The xen-unstable repo has interface version 0x03000001; out interface * is incompatible with that and any future minor revisions, so we * choose a different version number range that is numerically less * than that used in xen-unstable. */ #define XEN_MCA_INTERFACE_VERSION 0x01ecc003 /* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ #define XEN_MC_NONURGENT 0x0001 /* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ #define XEN_MC_URGENT 0x0002 /* IN: Dom0 acknowledges previosly-fetched telemetry */ #define XEN_MC_ACK 0x0004 /* OUT: All is ok */ #define XEN_MC_OK 0x0 /* OUT: Domain could not fetch data. */ #define XEN_MC_FETCHFAILED 0x1 /* OUT: There was no machine check data to fetch. */ #define XEN_MC_NODATA 0x2 /* OUT: Between notification time and this hypercall an other * (most likely) correctable error happened. The fetched data, * does not match the original machine check data. */ #define XEN_MC_NOMATCH 0x4 /* OUT: DomU did not register MC NMI handler. Try something else. */ #define XEN_MC_CANNOTHANDLE 0x8 /* OUT: Notifying DomU failed. Retry later or try something else. */ #define XEN_MC_NOTDELIVERED 0x10 /* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ #ifndef __ASSEMBLY__ #define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ /* * Machine Check Architecure: * structs are read-only and used to report all kinds of * correctable and uncorrectable errors detected by the HW. * Dom0 and DomU: register a handler to get notified. * Dom0 only: Correctable errors are reported via VIRQ_MCA * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers */ #define MC_TYPE_GLOBAL 0 #define MC_TYPE_BANK 1 #define MC_TYPE_EXTENDED 2 #define MC_TYPE_RECOVERY 3 struct mcinfo_common { uint16_t type; /* structure type */ uint16_t size; /* size of this struct in bytes */ }; #define MC_FLAG_CORRECTABLE (1 << 0) #define MC_FLAG_UNCORRECTABLE (1 << 1) #define MC_FLAG_RECOVERABLE (1 << 2) #define MC_FLAG_POLLED (1 << 3) #define MC_FLAG_RESET (1 << 4) #define MC_FLAG_CMCI (1 << 5) #define MC_FLAG_MCE (1 << 6) /* contains global x86 mc information */ struct mcinfo_global { struct mcinfo_common common; /* running domain at the time in error (most likely the impacted one) */ uint16_t mc_domid; uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ uint16_t mc_core_threadid; /* core thread of physical core */ uint32_t mc_apicid; uint32_t mc_flags; uint64_t mc_gstatus; /* global status */ }; /* contains bank local x86 mc information */ struct mcinfo_bank { struct mcinfo_common common; uint16_t mc_bank; /* bank nr */ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 * and if mc_addr is valid. Never valid on DomU. */ uint64_t mc_status; /* bank status */ uint64_t mc_addr; /* bank address, only valid * if addr bit is set in mc_status */ uint64_t mc_misc; uint64_t mc_ctrl2; uint64_t mc_tsc; }; struct mcinfo_msr { uint64_t reg; /* MSR */ uint64_t value; /* MSR value */ }; /* contains mc information from other * or additional mc MSRs */ struct mcinfo_extended { struct mcinfo_common common; /* You can fill up to five registers. * If you need more, then use this structure * multiple times. */ uint32_t mc_msrs; /* Number of msr with valid values. */ /* * Currently Intel extended MSR (32/64) include all gp registers * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be * useful at present. So expand this array to 16/32 to leave room. */ struct mcinfo_msr mc_msr[sizeof(void *) * 4]; }; /* Recovery Action flags. Giving recovery result information to DOM0 */ /* Xen takes successful recovery action, the error is recovered */ #define REC_ACTION_RECOVERED (0x1 << 0) /* No action is performed by XEN */ #define REC_ACTION_NONE (0x1 << 1) /* It's possible DOM0 might take action ownership in some case */ #define REC_ACTION_NEED_RESET (0x1 << 2) /* Different Recovery Action types, if the action is performed successfully, * REC_ACTION_RECOVERED flag will be returned. */ /* Page Offline Action */ #define MC_ACTION_PAGE_OFFLINE (0x1 << 0) /* CPU offline Action */ #define MC_ACTION_CPU_OFFLINE (0x1 << 1) /* L3 cache disable Action */ #define MC_ACTION_CACHE_SHRINK (0x1 << 2) /* Below interface used between XEN/DOM0 for passing XEN's recovery action * information to DOM0. * usage Senario: After offlining broken page, XEN might pass its page offline * recovery action result to DOM0. DOM0 will save the information in * non-volatile memory for further proactive actions, such as offlining the * easy broken page earlier when doing next reboot. */ struct page_offline_action { /* Params for passing the offlined page number to DOM0 */ uint64_t mfn; uint64_t status; }; struct cpu_offline_action { /* Params for passing the identity of the offlined CPU to DOM0 */ uint32_t mc_socketid; uint16_t mc_coreid; uint16_t mc_core_threadid; }; #define MAX_UNION_SIZE 16 struct mcinfo_recovery { struct mcinfo_common common; uint16_t mc_bank; /* bank nr */ uint8_t action_flags; uint8_t action_types; union { struct page_offline_action page_retire; struct cpu_offline_action cpu_offline; uint8_t pad[MAX_UNION_SIZE]; } action_info; }; #define MCINFO_HYPERCALLSIZE 1024 #define MCINFO_MAXSIZE 768 #define MCINFO_FLAGS_UNCOMPLETE 0x1 struct mc_info { /* Number of mcinfo_* entries in mi_data */ uint32_t mi_nentries; uint32_t flags; uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; }; typedef struct mc_info mc_info_t; DEFINE_XEN_GUEST_HANDLE(mc_info_t); #define __MC_MSR_ARRAYSIZE 8 #define __MC_NMSRS 1 #define MC_NCAPS 7 /* 7 CPU feature flag words */ #define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ #define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ #define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ #define MC_CAPS_LINUX 3 /* Linux-defined */ #define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ #define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ #define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ struct mcinfo_logical_cpu { uint32_t mc_cpunr; uint32_t mc_chipid; uint16_t mc_coreid; uint16_t mc_threadid; uint32_t mc_apicid; uint32_t mc_clusterid; uint32_t mc_ncores; uint32_t mc_ncores_active; uint32_t mc_nthreads; int32_t mc_cpuid_level; uint32_t mc_family; uint32_t mc_vendor; uint32_t mc_model; uint32_t mc_step; char mc_vendorid[16]; char mc_brandid[64]; uint32_t mc_cpu_caps[MC_NCAPS]; uint32_t mc_cache_size; uint32_t mc_cache_alignment; int32_t mc_nmsrvals; struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; }; typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); /* * OS's should use these instead of writing their own lookup function * each with its own bugs and drawbacks. * We use macros instead of static inline functions to allow guests * to include this header in assembly files (*.S). */ /* Prototype: * uint32_t x86_mcinfo_nentries(struct mc_info *mi); */ #define x86_mcinfo_nentries(_mi) \ (_mi)->mi_nentries /* Prototype: * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); */ #define x86_mcinfo_first(_mi) \ ((struct mcinfo_common *)(_mi)->mi_data) /* Prototype: * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); */ #define x86_mcinfo_next(_mic) \ ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) /* Prototype: * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); */ #define x86_mcinfo_lookup(_ret, _mi, _type) \ do { \ uint32_t found, i; \ struct mcinfo_common *_mic; \ \ found = 0; \ (_ret) = NULL; \ if (_mi == NULL) break; \ _mic = x86_mcinfo_first(_mi); \ for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ if (_mic->type == (_type)) { \ found = 1; \ break; \ } \ _mic = x86_mcinfo_next(_mic); \ } \ (_ret) = found ? _mic : NULL; \ } while (0) /* Usecase 1 * Register machine check trap callback handler * (already done via "set_trap_table" hypercall) */ /* Usecase 2 * Dom0 registers machine check event callback handler * done by EVTCHNOP_bind_virq */ /* Usecase 3 * Fetch machine check data from hypervisor. * Note, this hypercall is special, because both Dom0 and DomU must use this. */ #define XEN_MC_fetch 1 struct xen_mc_fetch { /* IN/OUT variables. */ uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, XEN_MC_ACK if ack'ing an earlier fetch */ /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */ uint32_t _pad0; uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ /* OUT variables. */ XEN_GUEST_HANDLE(mc_info_t) data; }; typedef struct xen_mc_fetch xen_mc_fetch_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); /* Usecase 4 * This tells the hypervisor to notify a DomU about the machine check error */ #define XEN_MC_notifydomain 2 struct xen_mc_notifydomain { /* IN variables. */ uint16_t mc_domid; /* The unprivileged domain to notify. */ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. * Usually echo'd value from the fetch hypercall. */ /* IN/OUT variables. */ uint32_t flags; /* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ /* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ }; typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); #define XEN_MC_physcpuinfo 3 struct xen_mc_physcpuinfo { /* IN/OUT */ uint32_t ncpus; uint32_t _pad0; /* OUT */ XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; }; #define XEN_MC_msrinject 4 #define MC_MSRINJ_MAXMSRS 8 struct xen_mc_msrinject { /* IN */ uint32_t mcinj_cpunr; /* target processor id */ uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ uint32_t _pad0; struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; }; /* Flags for mcinj_flags above; bits 16-31 are reserved */ #define MC_MSRINJ_F_INTERPOSE 0x1 #define XEN_MC_mceinject 5 struct xen_mc_mceinject { unsigned int mceinj_cpunr; /* target processor id */ }; #if defined(__XEN__) || defined(__XEN_TOOLS__) #define XEN_MC_inject_v2 6 #define XEN_MC_INJECT_TYPE_MASK 0x7 #define XEN_MC_INJECT_TYPE_MCE 0x0 #define XEN_MC_INJECT_TYPE_CMCI 0x1 #define XEN_MC_INJECT_CPU_BROADCAST 0x8 struct xen_mc_inject_v2 { uint32_t flags; - struct xenctl_cpumap cpumap; + struct xenctl_bitmap cpumap; }; #endif struct xen_mc { uint32_t cmd; uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ union { struct xen_mc_fetch mc_fetch; struct xen_mc_notifydomain mc_notifydomain; struct xen_mc_physcpuinfo mc_physcpuinfo; struct xen_mc_msrinject mc_msrinject; struct xen_mc_mceinject mc_mceinject; #if defined(__XEN__) || defined(__XEN_TOOLS__) struct xen_mc_inject_v2 mc_inject_v2; #endif } u; }; typedef struct xen_mc xen_mc_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_t); #endif /* __ASSEMBLY__ */ #endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ Index: projects/clang370-import/sys/xen/interface/arch-x86/xen-x86_32.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/xen-x86_32.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-x86/xen-x86_32.h (revision 288926) @@ -1,171 +1,172 @@ /****************************************************************************** * xen-x86_32.h * * Guest OS interface to x86 32-bit Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004-2007, K A Fraser */ #ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ #define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ /* * Hypercall interface: * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6) * Output: %eax * Access is via hypercall page (set up by guest loader or via a Xen MSR): * call hypercall_page + hypercall-number * 32 * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) */ /* * These flat segments are in the Xen-private section of every GDT. Since these * are also present in the initial GDT, many OSes will be able to avoid * installing their own GDT. */ #define FLAT_RING1_CS 0xe019 /* GDT index 259 */ #define FLAT_RING1_DS 0xe021 /* GDT index 260 */ #define FLAT_RING1_SS 0xe021 /* GDT index 260 */ #define FLAT_RING3_CS 0xe02b /* GDT index 261 */ #define FLAT_RING3_DS 0xe033 /* GDT index 262 */ #define FLAT_RING3_SS 0xe033 /* GDT index 262 */ #define FLAT_KERNEL_CS FLAT_RING1_CS #define FLAT_KERNEL_DS FLAT_RING1_DS #define FLAT_KERNEL_SS FLAT_RING1_SS #define FLAT_USER_CS FLAT_RING3_CS #define FLAT_USER_DS FLAT_RING3_DS #define FLAT_USER_SS FLAT_RING3_SS #define __HYPERVISOR_VIRT_START_PAE 0xF5800000 #define __MACH2PHYS_VIRT_START_PAE 0xF5800000 #define __MACH2PHYS_VIRT_END_PAE 0xF6800000 #define HYPERVISOR_VIRT_START_PAE \ mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) #define MACH2PHYS_VIRT_START_PAE \ mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) #define MACH2PHYS_VIRT_END_PAE \ mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) /* Non-PAE bounds are obsolete. */ #define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 #define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 #define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 #define HYPERVISOR_VIRT_START_NONPAE \ mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) #define MACH2PHYS_VIRT_START_NONPAE \ mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) #define MACH2PHYS_VIRT_END_NONPAE \ mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) #define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE #define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE #define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE #ifndef HYPERVISOR_VIRT_START #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) #endif /* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ #if defined(__XEN__) || defined(__XEN_TOOLS__) #undef ___DEFINE_XEN_GUEST_HANDLE #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } \ __guest_handle_ ## name; \ typedef struct { union { type *p; uint64_aligned_t q; }; } \ __guest_handle_64_ ## name #undef set_xen_guest_handle_raw #define set_xen_guest_handle_raw(hnd, val) \ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ (hnd).p = val; \ } while ( 0 ) +#define int64_aligned_t int64_t __attribute__((aligned(8))) #define uint64_aligned_t uint64_t __attribute__((aligned(8))) #define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name #define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) #endif #ifndef __ASSEMBLY__ struct cpu_user_regs { uint32_t ebx; uint32_t ecx; uint32_t edx; uint32_t esi; uint32_t edi; uint32_t ebp; uint32_t eax; uint16_t error_code; /* private */ uint16_t entry_vector; /* private */ uint32_t eip; uint16_t cs; uint8_t saved_upcall_mask; uint8_t _pad0; uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ uint32_t esp; uint16_t ss, _pad1; uint16_t es, _pad2; uint16_t ds, _pad3; uint16_t fs, _pad4; uint16_t gs, _pad5; }; typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); /* * Page-directory addresses above 4GB do not fit into architectural %cr3. * When accessing %cr3, or equivalent field in vcpu_guest_context, guests * must use the following accessor macros to pack/unpack valid MFNs. */ #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) struct arch_vcpu_info { unsigned long cr2; unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ }; typedef struct arch_vcpu_info arch_vcpu_info_t; struct xen_callback { unsigned long cs; unsigned long eip; }; typedef struct xen_callback xen_callback_t; #endif /* !__ASSEMBLY__ */ #endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/arch-x86/xen-x86_64.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/xen-x86_64.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-x86/xen-x86_64.h (revision 288926) @@ -1,202 +1,202 @@ /****************************************************************************** * xen-x86_64.h * * Guest OS interface to x86 64-bit Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004-2006, K A Fraser */ #ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ #define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ /* * Hypercall interface: * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) * Output: %rax * Access is via hypercall page (set up by guest loader or via a Xen MSR): * call hypercall_page + hypercall-number * 32 * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) */ /* * 64-bit segment selectors * These flat segments are in the Xen-private section of every GDT. Since these * are also present in the initial GDT, many OSes will be able to avoid * installing their own GDT. */ #define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ #define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ #define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ #define FLAT_RING3_DS64 0x0000 /* NULL selector */ #define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ #define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ #define FLAT_KERNEL_DS64 FLAT_RING3_DS64 #define FLAT_KERNEL_DS32 FLAT_RING3_DS32 #define FLAT_KERNEL_DS FLAT_KERNEL_DS64 #define FLAT_KERNEL_CS64 FLAT_RING3_CS64 #define FLAT_KERNEL_CS32 FLAT_RING3_CS32 #define FLAT_KERNEL_CS FLAT_KERNEL_CS64 #define FLAT_KERNEL_SS64 FLAT_RING3_SS64 #define FLAT_KERNEL_SS32 FLAT_RING3_SS32 #define FLAT_KERNEL_SS FLAT_KERNEL_SS64 #define FLAT_USER_DS64 FLAT_RING3_DS64 #define FLAT_USER_DS32 FLAT_RING3_DS32 #define FLAT_USER_DS FLAT_USER_DS64 #define FLAT_USER_CS64 FLAT_RING3_CS64 #define FLAT_USER_CS32 FLAT_RING3_CS32 #define FLAT_USER_CS FLAT_USER_CS64 #define FLAT_USER_SS64 FLAT_RING3_SS64 #define FLAT_USER_SS32 FLAT_RING3_SS32 #define FLAT_USER_SS FLAT_USER_SS64 #define __HYPERVISOR_VIRT_START 0xFFFF800000000000 #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 #ifndef HYPERVISOR_VIRT_START #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) #endif #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) #endif /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) * @which == SEGBASE_* ; @base == 64-bit base address * Returns 0 on success. */ #define SEGBASE_FS 0 #define SEGBASE_GS_USER 1 #define SEGBASE_GS_KERNEL 2 #define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ /* * int HYPERVISOR_iret(void) * All arguments are on the kernel stack, in the following format. * Never returns if successful. Current kernel context is lost. * The saved CS is mapped as follows: * RING0 -> RING3 kernel mode. * RING1 -> RING3 kernel mode. * RING2 -> RING3 kernel mode. * RING3 -> RING3 user mode. * However RING0 indicates that the guest kernel should return to iteself * directly with * orb $3,1*8(%rsp) * iretq * If flags contains VGCF_in_syscall: * Restore RAX, RIP, RFLAGS, RSP. * Discard R11, RCX, CS, SS. * Otherwise: * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. * All other registers are saved on hypercall entry and restored to user. */ /* Guest exited in SYSCALL context? Return to guest with SYSRET? */ #define _VGCF_in_syscall 8 #define VGCF_in_syscall (1<<_VGCF_in_syscall) #define VGCF_IN_SYSCALL VGCF_in_syscall #ifndef __ASSEMBLY__ struct iret_context { /* Top of stack (%rsp at point of hypercall). */ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; /* Bottom of iret stack frame. */ }; #if defined(__GNUC__) && !defined(__STRICT_ANSI__) /* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ #define __DECL_REG(name) union { \ uint64_t r ## name, e ## name; \ uint32_t _e ## name; \ } #else /* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ #define __DECL_REG(name) uint64_t r ## name #endif struct cpu_user_regs { uint64_t r15; uint64_t r14; uint64_t r13; uint64_t r12; __DECL_REG(bp); __DECL_REG(bx); uint64_t r11; uint64_t r10; uint64_t r9; uint64_t r8; __DECL_REG(ax); __DECL_REG(cx); __DECL_REG(dx); __DECL_REG(si); __DECL_REG(di); uint32_t error_code; /* private */ uint32_t entry_vector; /* private */ __DECL_REG(ip); uint16_t cs, _pad0[1]; uint8_t saved_upcall_mask; uint8_t _pad1[3]; __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ __DECL_REG(sp); uint16_t ss, _pad2[3]; uint16_t es, _pad3[3]; uint16_t ds, _pad4[3]; uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ }; typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); #undef __DECL_REG #define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) #define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) struct arch_vcpu_info { unsigned long cr2; unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ }; typedef struct arch_vcpu_info arch_vcpu_info_t; typedef unsigned long xen_callback_t; #endif /* !__ASSEMBLY__ */ #endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/arch-x86/xen.h =================================================================== --- projects/clang370-import/sys/xen/interface/arch-x86/xen.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/arch-x86/xen.h (revision 288926) @@ -1,237 +1,317 @@ /****************************************************************************** * arch-x86/xen.h * * Guest OS interface to x86 Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004-2006, K A Fraser */ #include "../xen.h" #ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ #define __XEN_PUBLIC_ARCH_X86_XEN_H__ /* Structural guest handles introduced in 0x00030201. */ #if __XEN_INTERFACE_VERSION__ >= 0x00030201 #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name #else #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef type * __guest_handle_ ## name #endif +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ #define __DEFINE_XEN_GUEST_HANDLE(name, type) \ ___DEFINE_XEN_GUEST_HANDLE(name, type); \ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) #define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #if defined(__i386__) #include "xen-x86_32.h" #elif defined(__x86_64__) #include "xen-x86_64.h" #endif #ifndef __ASSEMBLY__ typedef unsigned long xen_pfn_t; #define PRI_xen_pfn "lx" #endif +#define XEN_HAVE_PV_GUEST_ENTRY 1 + +#define XEN_HAVE_PV_UPCALL_MASK 1 + /* - * SEGMENT DESCRIPTOR TABLES + * `incontents 200 segdesc Segment Descriptor Tables */ /* * ` enum neg_errnoval * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); * ` */ /* * A number of GDT entries are reserved by Xen. These are not situated at the * start of the GDT because some stupid OSes export hard-coded selector values * in their ABI. These hard-coded values are always near the start of the GDT, * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op */ #define FIRST_RESERVED_GDT_PAGE 14 #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc); + * ` + * ` @pa The machine physical address of the descriptor to + * ` update. Must be either a descriptor page or writable. + * ` @desc The descriptor value to update, in the same format as a + * ` native descriptor table entry. + */ + /* Maximum number of virtual CPUs in legacy multi-processor guests. */ #define XEN_LEGACY_MAX_VCPUS 32 #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" /* * ` enum neg_errnoval * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp); * ` * Sets the stack segment and pointer for the current vcpu. */ /* * ` enum neg_errnoval * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); * ` */ /* * Send an array of these to HYPERVISOR_set_trap_table(). * Terminate the array with a sentinel entry, with traps[].address==0. * The privilege level specifies which modes may enter a trap via a software * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate * privilege levels as follows: * Level == 0: Noone may enter * Level == 1: Kernel may enter * Level == 2: Kernel may enter * Level == 3: Everyone may enter */ #define TI_GET_DPL(_ti) ((_ti)->flags & 3) #define TI_GET_IF(_ti) ((_ti)->flags & 4) #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ uint16_t cs; /* code selector */ unsigned long address; /* code offset */ }; typedef struct trap_info trap_info_t; DEFINE_XEN_GUEST_HANDLE(trap_info_t); typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + * + * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise + * for HVM and PVH guests, not all information in this structure is updated: + * + * - For HVM guests, the structures read include: fpu_ctxt (if + * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other fields not used should be set to 0. */ struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ #define VGCF_I387_VALID (1<<0) #define VGCF_IN_KERNEL (1<<2) #define _VGCF_i387_valid 0 #define VGCF_i387_valid (1<<_VGCF_i387_valid) #define _VGCF_in_kernel 2 #define VGCF_in_kernel (1<<_VGCF_in_kernel) #define _VGCF_failsafe_disables_events 3 #define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) #define _VGCF_syscall_disables_events 4 #define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) #define _VGCF_online 5 #define VGCF_online (1<<_VGCF_online) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ #ifdef __i386__ unsigned long event_callback_cs; /* CS:EIP of event callback */ unsigned long event_callback_eip; unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ unsigned long failsafe_callback_eip; #else unsigned long event_callback_eip; unsigned long failsafe_callback_eip; #ifdef __XEN__ union { unsigned long syscall_callback_eip; struct { unsigned int event_callback_cs; /* compat CS of event cb */ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ }; }; #else unsigned long syscall_callback_eip; #endif #endif unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ #ifdef __x86_64__ /* Segment base addresses. */ uint64_t fs_base; uint64_t gs_base_kernel; uint64_t gs_base_user; #endif }; typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + unsigned long max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it has + * been set to invalid e.g. due to the p2m being too large for the 3-level + * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr + * is to be used. + */ xen_pfn_t pfn_to_mfn_frame_list_list; unsigned long nmi_reason; - uint64_t pad[32]; + /* + * Following three fields are valid if p2m_cr3 contains a value different + * from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register state + * and holds the folded machine frame number (via xen_pfn_to_cr3) of a + * L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All entries + * in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 and + * a value with the least significant bit set indicates that a mapping + * update is in progress. This allows guest external software (e.g. in Dom0) + * to verify that read mappings are consistent and whether they have changed + * since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an atomic + * write only. + */ + unsigned long p2m_cr3; /* cr3 value of the p2m address space */ + unsigned long p2m_vaddr; /* virtual address of the p2m list */ + unsigned long p2m_generation; /* generation count of p2m mapping */ +#ifdef __i386__ + /* There's no room for this field in the generic structure. */ + uint32_t wc_sec_hi; +#endif }; typedef struct arch_shared_info arch_shared_info_t; +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* + * struct xen_arch_domainconfig's ABI is covered by + * XEN_DOMCTL_INTERFACE_VERSION. + */ +struct xen_arch_domainconfig { + char dummy; +}; +#endif + #endif /* !__ASSEMBLY__ */ /* * ` enum neg_errnoval * ` HYPERVISOR_fpu_taskswitch(int set); * ` * Sets (if set!=0) or clears (if set==0) CR0.TS. */ /* * ` enum neg_errnoval * ` HYPERVISOR_set_debugreg(int regno, unsigned long value); * * ` unsigned long * ` HYPERVISOR_get_debugreg(int regno); * For 0<=reg<=7, returns the debug register value. * For other values of reg, returns ((unsigned long)-EINVAL). * (Unfortunately, this interface is defective.) */ /* * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ #ifdef __ASSEMBLY__ #define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; #define XEN_CPUID XEN_EMULATE_PREFIX cpuid #else #define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif #endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/callback.h =================================================================== --- projects/clang370-import/sys/xen/interface/callback.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/callback.h (revision 288926) @@ -1,121 +1,121 @@ /****************************************************************************** * callback.h * * Register guest OS callbacks with Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2006, Ian Campbell */ #ifndef __XEN_PUBLIC_CALLBACK_H__ #define __XEN_PUBLIC_CALLBACK_H__ #include "xen.h" /* * Prototype for this hypercall is: * long callback_op(int cmd, void *extra_args) * @cmd == CALLBACKOP_??? (callback operation). * @extra_args == Operation-specific extra arguments (NULL if none). */ -/* ia64, x86: Callback for event delivery. */ +/* x86: Callback for event delivery. */ #define CALLBACKTYPE_event 0 /* x86: Failsafe callback when guest state cannot be restored by Xen. */ #define CALLBACKTYPE_failsafe 1 /* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ #define CALLBACKTYPE_syscall 2 /* * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel * feature is enabled. Do not use this callback type in new code. */ #define CALLBACKTYPE_sysenter_deprecated 3 /* x86: Callback for NMI delivery. */ #define CALLBACKTYPE_nmi 4 /* * x86: sysenter is only available as follows: * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs * ('32-on-32-on-64', '32-on-64-on-64') * [nb. also 64-bit guest applications on Intel CPUs * ('64-on-64-on-64'), but syscall is preferred] */ #define CALLBACKTYPE_sysenter 5 /* * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs * ('32-on-32-on-64', '32-on-64-on-64') */ #define CALLBACKTYPE_syscall32 7 /* * Disable event deliver during callback? This flag is ignored for event and * NMI callbacks: event delivery is unconditionally disabled. */ #define _CALLBACKF_mask_events 0 #define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) /* * Register a callback. */ #define CALLBACKOP_register 0 struct callback_register { uint16_t type; uint16_t flags; xen_callback_t address; }; typedef struct callback_register callback_register_t; DEFINE_XEN_GUEST_HANDLE(callback_register_t); /* * Unregister a callback. * * Not all callbacks can be unregistered. -EINVAL will be returned if * you attempt to unregister such a callback. */ #define CALLBACKOP_unregister 1 struct callback_unregister { uint16_t type; uint16_t _unused; }; typedef struct callback_unregister callback_unregister_t; DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); #if __XEN_INTERFACE_VERSION__ < 0x00030207 #undef CALLBACKTYPE_sysenter #define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated #endif #endif /* __XEN_PUBLIC_CALLBACK_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/dom0_ops.h =================================================================== --- projects/clang370-import/sys/xen/interface/dom0_ops.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/dom0_ops.h (revision 288926) @@ -1,120 +1,120 @@ /****************************************************************************** * dom0_ops.h * * Process command requests from domain-0 guest OS. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2002-2003, B Dragovic * Copyright (c) 2002-2006, K Fraser */ #ifndef __XEN_PUBLIC_DOM0_OPS_H__ #define __XEN_PUBLIC_DOM0_OPS_H__ #include "xen.h" #include "platform.h" #if __XEN_INTERFACE_VERSION__ >= 0x00030204 #error "dom0_ops.h is a compatibility interface only" #endif #define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION #define DOM0_SETTIME XENPF_settime #define dom0_settime xenpf_settime #define dom0_settime_t xenpf_settime_t #define DOM0_ADD_MEMTYPE XENPF_add_memtype #define dom0_add_memtype xenpf_add_memtype #define dom0_add_memtype_t xenpf_add_memtype_t #define DOM0_DEL_MEMTYPE XENPF_del_memtype #define dom0_del_memtype xenpf_del_memtype #define dom0_del_memtype_t xenpf_del_memtype_t #define DOM0_READ_MEMTYPE XENPF_read_memtype #define dom0_read_memtype xenpf_read_memtype #define dom0_read_memtype_t xenpf_read_memtype_t #define DOM0_MICROCODE XENPF_microcode_update #define dom0_microcode xenpf_microcode_update #define dom0_microcode_t xenpf_microcode_update_t #define DOM0_PLATFORM_QUIRK XENPF_platform_quirk #define dom0_platform_quirk xenpf_platform_quirk #define dom0_platform_quirk_t xenpf_platform_quirk_t typedef uint64_t cpumap_t; /* Unsupported legacy operation -- defined for API compatibility. */ #define DOM0_MSR 15 struct dom0_msr { /* IN variables. */ uint32_t write; cpumap_t cpu_mask; uint32_t msr; uint32_t in1; uint32_t in2; /* OUT variables. */ uint32_t out1; uint32_t out2; }; typedef struct dom0_msr dom0_msr_t; DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); /* Unsupported legacy operation -- defined for API compatibility. */ #define DOM0_PHYSICAL_MEMORY_MAP 40 struct dom0_memory_map_entry { uint64_t start, end; uint32_t flags; /* reserved */ uint8_t is_ram; }; typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); struct dom0_op { uint32_t cmd; uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ union { struct dom0_msr msr; struct dom0_settime settime; struct dom0_add_memtype add_memtype; struct dom0_del_memtype del_memtype; struct dom0_read_memtype read_memtype; struct dom0_microcode microcode; struct dom0_platform_quirk platform_quirk; struct dom0_memory_map_entry physical_memory_map; uint8_t pad[128]; } u; }; typedef struct dom0_op dom0_op_t; DEFINE_XEN_GUEST_HANDLE(dom0_op_t); #endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/domctl.h =================================================================== --- projects/clang370-import/sys/xen/interface/domctl.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/domctl.h (revision 288926) @@ -1,991 +1,1220 @@ /****************************************************************************** * domctl.h * * Domain management operations. For use by node control stack. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2002-2003, B Dragovic * Copyright (c) 2002-2006, K Fraser */ #ifndef __XEN_PUBLIC_DOMCTL_H__ #define __XEN_PUBLIC_DOMCTL_H__ #if !defined(__XEN__) && !defined(__XEN_TOOLS__) #error "domctl operations are intended for use by node control tools only" #endif #include "xen.h" #include "grant_table.h" +#include "hvm/save.h" +#include "memory.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000008 +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000b /* * NB. xen_domctl.domain is an IN/OUT parameter for this operation. * If it is specified as zero, an id is auto-allocated and returned. */ /* XEN_DOMCTL_createdomain */ struct xen_domctl_createdomain { /* IN parameters */ uint32_t ssidref; xen_domain_handle_t handle; - /* Is this an HVM guest (as opposed to a PV guest)? */ + /* Is this an HVM guest (as opposed to a PVH or PV guest)? */ #define _XEN_DOMCTL_CDF_hvm_guest 0 #define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) /* Use hardware-assisted paging if available? */ #define _XEN_DOMCTL_CDF_hap 1 #define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) /* Should domain memory integrity be verifed by tboot during Sx? */ #define _XEN_DOMCTL_CDF_s3_integrity 2 #define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) /* Disable out-of-sync shadow page tables? */ #define _XEN_DOMCTL_CDF_oos_off 3 #define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) + /* Is this a PVH guest (as opposed to an HVM or PV guest)? */ +#define _XEN_DOMCTL_CDF_pvh_guest 4 +#define XEN_DOMCTL_CDF_pvh_guest (1U<<_XEN_DOMCTL_CDF_pvh_guest) uint32_t flags; + struct xen_arch_domainconfig config; }; typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); /* XEN_DOMCTL_getdomaininfo */ struct xen_domctl_getdomaininfo { /* OUT variables. */ domid_t domain; /* Also echoed in domctl.domain */ /* Domain is scheduled to die. */ #define _XEN_DOMINF_dying 0 #define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) /* Domain is an HVM guest (as opposed to a PV guest). */ #define _XEN_DOMINF_hvm_guest 1 #define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) /* The guest OS has shut down. */ #define _XEN_DOMINF_shutdown 2 #define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) /* Currently paused by control software. */ #define _XEN_DOMINF_paused 3 #define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) /* Currently blocked pending an event. */ #define _XEN_DOMINF_blocked 4 #define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) /* Domain is currently running. */ #define _XEN_DOMINF_running 5 #define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) /* Being debugged. */ #define _XEN_DOMINF_debugged 6 #define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) +/* domain is PVH */ +#define _XEN_DOMINF_pvh_guest 7 +#define XEN_DOMINF_pvh_guest (1U<<_XEN_DOMINF_pvh_guest) /* XEN_DOMINF_shutdown guest-supplied code. */ #define XEN_DOMINF_shutdownmask 255 #define XEN_DOMINF_shutdownshift 16 uint32_t flags; /* XEN_DOMINF_* */ uint64_aligned_t tot_pages; uint64_aligned_t max_pages; + uint64_aligned_t outstanding_pages; uint64_aligned_t shr_pages; uint64_aligned_t paged_pages; uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ uint64_aligned_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ +#define XEN_INVALID_MAX_VCPU_ID (~0U) /* Domain has no vcpus? */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; uint32_t cpupool; }; typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); /* XEN_DOMCTL_getmemlist */ struct xen_domctl_getmemlist { /* IN variables. */ /* Max entries to write to output buffer. */ uint64_aligned_t max_pfns; /* Start index in guest's page list. */ uint64_aligned_t start_pfn; XEN_GUEST_HANDLE_64(uint64) buffer; /* OUT variables. */ uint64_aligned_t num_pfns; }; typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); /* XEN_DOMCTL_getpageframeinfo */ #define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 #define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) #define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) #define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) #define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) #define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) #define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) #define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ #define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */ -#define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28) +#define XEN_DOMCTL_PFINFO_BROKEN (0xdU<<28) /* broken page */ #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) -struct xen_domctl_getpageframeinfo { - /* IN variables. */ - uint64_aligned_t gmfn; /* GMFN to query */ - /* OUT variables. */ - /* Is the page PINNED to a type? */ - uint32_t type; /* see above type defs */ -}; -typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; -DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); - - -/* XEN_DOMCTL_getpageframeinfo2 */ -struct xen_domctl_getpageframeinfo2 { - /* IN variables. */ - uint64_aligned_t num; - /* IN/OUT variables. */ - XEN_GUEST_HANDLE_64(uint32) array; -}; -typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; -DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); - /* XEN_DOMCTL_getpageframeinfo3 */ struct xen_domctl_getpageframeinfo3 { /* IN variables. */ uint64_aligned_t num; /* IN/OUT variables. */ XEN_GUEST_HANDLE_64(xen_pfn_t) array; }; /* * Control shadow pagetables operation */ /* XEN_DOMCTL_shadow_op */ /* Disable shadow mode. */ #define XEN_DOMCTL_SHADOW_OP_OFF 0 /* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ #define XEN_DOMCTL_SHADOW_OP_ENABLE 32 /* Log-dirty bitmap operations. */ /* Return the bitmap and clean internal copy for next round. */ #define XEN_DOMCTL_SHADOW_OP_CLEAN 11 /* Return the bitmap but do not modify internal copy. */ #define XEN_DOMCTL_SHADOW_OP_PEEK 12 /* Memory allocation accessors. */ #define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 #define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 /* Legacy enable operations. */ /* Equiv. to ENABLE with no mode flags. */ #define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ #define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ #define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 /* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ /* * Shadow pagetables are refcounted: guest does not use explicit mmu * operations nor write-protect its pagetables. */ #define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) /* * Log pages in a bitmap as they are dirtied. * Used for live relocation to determine which pages must be re-sent. */ #define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) /* * Automatically translate GPFNs into MFNs. */ #define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) /* * Xen does not steal virtual address space from the guest. * Requires HVM support. */ #define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) struct xen_domctl_shadow_op_stats { uint32_t fault_count; uint32_t dirty_count; }; typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); struct xen_domctl_shadow_op { /* IN variables. */ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ /* OP_ENABLE */ uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ uint32_t mb; /* Shadow memory allocation in MB */ /* OP_PEEK / OP_CLEAN */ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ struct xen_domctl_shadow_op_stats stats; }; typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); /* XEN_DOMCTL_max_mem */ struct xen_domctl_max_mem { /* IN variables. */ uint64_aligned_t max_memkb; }; typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); /* XEN_DOMCTL_setvcpucontext */ /* XEN_DOMCTL_getvcpucontext */ struct xen_domctl_vcpucontext { uint32_t vcpu; /* IN */ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ }; typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); /* XEN_DOMCTL_getvcpuinfo */ struct xen_domctl_getvcpuinfo { /* IN variables. */ uint32_t vcpu; /* OUT variables. */ uint8_t online; /* currently online (not hotplugged)? */ uint8_t blocked; /* blocked waiting for an event? */ uint8_t running; /* currently scheduled on its CPU? */ uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ uint32_t cpu; /* current mapping */ }; typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); +/* Get/set the NUMA node(s) with which the guest has affinity with. */ +/* XEN_DOMCTL_setnodeaffinity */ +/* XEN_DOMCTL_getnodeaffinity */ +struct xen_domctl_nodeaffinity { + struct xenctl_bitmap nodemap;/* IN */ +}; +typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t); + + /* Get/set which physical cpus a vcpu can execute on. */ /* XEN_DOMCTL_setvcpuaffinity */ /* XEN_DOMCTL_getvcpuaffinity */ struct xen_domctl_vcpuaffinity { - uint32_t vcpu; /* IN */ - struct xenctl_cpumap cpumap; /* IN/OUT */ + /* IN variables. */ + uint32_t vcpu; + /* Set/get the hard affinity for vcpu */ +#define _XEN_VCPUAFFINITY_HARD 0 +#define XEN_VCPUAFFINITY_HARD (1U<<_XEN_VCPUAFFINITY_HARD) + /* Set/get the soft affinity for vcpu */ +#define _XEN_VCPUAFFINITY_SOFT 1 +#define XEN_VCPUAFFINITY_SOFT (1U<<_XEN_VCPUAFFINITY_SOFT) + uint32_t flags; + /* + * IN/OUT variables. + * + * Both are IN/OUT for XEN_DOMCTL_setvcpuaffinity, in which case they + * contain effective hard or/and soft affinity. That is, upon successful + * return, cpumap_soft, contains the intersection of the soft affinity, + * hard affinity and the cpupool's online CPUs for the domain (if + * XEN_VCPUAFFINITY_SOFT was set in flags). cpumap_hard contains the + * intersection between hard affinity and the cpupool's online CPUs (if + * XEN_VCPUAFFINITY_HARD was set in flags). + * + * Both are OUT-only for XEN_DOMCTL_getvcpuaffinity, in which case they + * contain the plain hard and/or soft affinity masks that were set during + * previous successful calls to XEN_DOMCTL_setvcpuaffinity (or the + * default values), without intersecting or altering them in any way. + */ + struct xenctl_bitmap cpumap_hard; + struct xenctl_bitmap cpumap_soft; }; typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); /* XEN_DOMCTL_max_vcpus */ struct xen_domctl_max_vcpus { uint32_t max; /* maximum number of vcpus */ }; typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); /* XEN_DOMCTL_scheduler_op */ /* Scheduler types. */ -#define XEN_SCHEDULER_SEDF 4 +/* #define XEN_SCHEDULER_SEDF 4 (Removed) */ #define XEN_SCHEDULER_CREDIT 5 #define XEN_SCHEDULER_CREDIT2 6 #define XEN_SCHEDULER_ARINC653 7 +#define XEN_SCHEDULER_RTDS 8 + /* Set or get info? */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 struct xen_domctl_scheduler_op { uint32_t sched_id; /* XEN_SCHEDULER_* */ uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ union { - struct xen_domctl_sched_sedf { - uint64_aligned_t period; - uint64_aligned_t slice; - uint64_aligned_t latency; - uint32_t extratime; - uint32_t weight; - } sedf; struct xen_domctl_sched_credit { uint16_t weight; uint16_t cap; } credit; struct xen_domctl_sched_credit2 { uint16_t weight; } credit2; + struct xen_domctl_sched_rtds { + uint32_t period; + uint32_t budget; + } rtds; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); /* XEN_DOMCTL_setdomainhandle */ struct xen_domctl_setdomainhandle { xen_domain_handle_t handle; }; typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); /* XEN_DOMCTL_setdebugging */ struct xen_domctl_setdebugging { uint8_t enable; }; typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); /* XEN_DOMCTL_irq_permission */ struct xen_domctl_irq_permission { uint8_t pirq; uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ }; typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); /* XEN_DOMCTL_iomem_permission */ struct xen_domctl_iomem_permission { uint64_aligned_t first_mfn;/* first page (physical page number) in range */ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ }; typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); /* XEN_DOMCTL_ioport_permission */ struct xen_domctl_ioport_permission { uint32_t first_port; /* first port int range */ uint32_t nr_ports; /* size of port range */ uint8_t allow_access; /* allow or deny access to range? */ }; typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); /* XEN_DOMCTL_hypercall_init */ struct xen_domctl_hypercall_init { uint64_aligned_t gmfn; /* GMFN to be initialised */ }; typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); -/* XEN_DOMCTL_arch_setup */ -#define _XEN_DOMAINSETUP_hvm_guest 0 -#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) -#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ -#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) -#define _XEN_DOMAINSETUP_sioemu_guest 2 -#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest) -typedef struct xen_domctl_arch_setup { - uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ -#ifdef __ia64__ - uint64_aligned_t bp; /* mpaddr of boot param area */ - uint64_aligned_t maxmem; /* Highest memory address for MDT. */ - uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */ - uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ - int8_t vhpt_size_log2; /* Log2 of VHPT size. */ -#endif -} xen_domctl_arch_setup_t; -DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); - - /* XEN_DOMCTL_settimeoffset */ struct xen_domctl_settimeoffset { - int32_t time_offset_seconds; /* applied to domain wallclock time */ + int64_aligned_t time_offset_seconds; /* applied to domain wallclock time */ }; typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); /* XEN_DOMCTL_gethvmcontext */ /* XEN_DOMCTL_sethvmcontext */ typedef struct xen_domctl_hvmcontext { uint32_t size; /* IN/OUT: size of buffer / bytes filled */ XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call * gethvmcontext with NULL * buffer to get size req'd */ } xen_domctl_hvmcontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); /* XEN_DOMCTL_set_address_size */ /* XEN_DOMCTL_get_address_size */ typedef struct xen_domctl_address_size { uint32_t size; } xen_domctl_address_size_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); -/* XEN_DOMCTL_real_mode_area */ -struct xen_domctl_real_mode_area { - uint32_t log; /* log2 of Real Mode Area size */ -}; -typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; -DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); - - /* XEN_DOMCTL_sendtrigger */ #define XEN_DOMCTL_SENDTRIGGER_NMI 0 #define XEN_DOMCTL_SENDTRIGGER_RESET 1 #define XEN_DOMCTL_SENDTRIGGER_INIT 2 #define XEN_DOMCTL_SENDTRIGGER_POWER 3 #define XEN_DOMCTL_SENDTRIGGER_SLEEP 4 struct xen_domctl_sendtrigger { uint32_t trigger; /* IN */ uint32_t vcpu; /* IN */ }; typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); -/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ +/* Assign a device to a guest. Sets up IOMMU structures. */ /* XEN_DOMCTL_assign_device */ /* XEN_DOMCTL_test_assign_device */ -/* XEN_DOMCTL_deassign_device */ +/* + * XEN_DOMCTL_deassign_device: The behavior of this DOMCTL differs + * between the different type of device: + * - PCI device (XEN_DOMCTL_DEV_PCI) will be reassigned to DOM0 + * - DT device (XEN_DOMCTL_DT_PCI) will left unassigned. DOM0 + * will have to call XEN_DOMCTL_assign_device in order to use the + * device. + */ +#define XEN_DOMCTL_DEV_PCI 0 +#define XEN_DOMCTL_DEV_DT 1 struct xen_domctl_assign_device { - uint32_t machine_sbdf; /* machine PCI ID of assigned device */ + uint32_t dev; /* XEN_DOMCTL_DEV_* */ + union { + struct { + uint32_t machine_sbdf; /* machine PCI ID of assigned device */ + } pci; + struct { + uint32_t size; /* Length of the path */ + XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */ + } dt; + } u; + /* IN */ +#define XEN_DOMCTL_DEV_RDM_RELAXED 1 + uint32_t flag; /* flag of assigned device */ }; typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); /* Retrieve sibling devices infomation of machine_sbdf */ /* XEN_DOMCTL_get_device_group */ struct xen_domctl_get_device_group { uint32_t machine_sbdf; /* IN */ uint32_t max_sdevs; /* IN */ uint32_t num_sdevs; /* OUT */ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ }; typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); /* Pass-through interrupts: bind real irq -> hvm devfn. */ /* XEN_DOMCTL_bind_pt_irq */ /* XEN_DOMCTL_unbind_pt_irq */ typedef enum pt_irq_type_e { PT_IRQ_TYPE_PCI, PT_IRQ_TYPE_ISA, PT_IRQ_TYPE_MSI, PT_IRQ_TYPE_MSI_TRANSLATE, + PT_IRQ_TYPE_SPI, /* ARM: valid range 32-1019 */ } pt_irq_type_t; struct xen_domctl_bind_pt_irq { uint32_t machine_irq; pt_irq_type_t irq_type; uint32_t hvm_domid; union { struct { uint8_t isa_irq; } isa; struct { uint8_t bus; uint8_t device; uint8_t intx; } pci; struct { uint8_t gvec; uint32_t gflags; uint64_aligned_t gtable; } msi; + struct { + uint16_t spi; + } spi; } u; }; typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); /* Bind machine I/O address range -> HVM address range. */ +/* If this returns -E2BIG lower nr_mfns value. */ /* XEN_DOMCTL_memory_mapping */ #define DPCI_ADD_MAPPING 1 #define DPCI_REMOVE_MAPPING 0 struct xen_domctl_memory_mapping { uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ uint64_aligned_t first_mfn; /* first page (machine page) in range */ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ uint32_t add_mapping; /* add or remove mapping */ uint32_t padding; /* padding for 64-bit aligned structure */ }; typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); /* Bind machine I/O port range -> HVM I/O port range. */ /* XEN_DOMCTL_ioport_mapping */ struct xen_domctl_ioport_mapping { uint32_t first_gport; /* first guest IO port*/ uint32_t first_mport; /* first machine IO port */ uint32_t nr_ports; /* size of port range */ uint32_t add_mapping; /* add or remove mapping */ }; typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); /* * Pin caching type of RAM space for x86 HVM domU. */ /* XEN_DOMCTL_pin_mem_cacheattr */ /* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ #define XEN_DOMCTL_MEM_CACHEATTR_UC 0 #define XEN_DOMCTL_MEM_CACHEATTR_WC 1 #define XEN_DOMCTL_MEM_CACHEATTR_WT 4 #define XEN_DOMCTL_MEM_CACHEATTR_WP 5 #define XEN_DOMCTL_MEM_CACHEATTR_WB 6 #define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 +#define XEN_DOMCTL_DELETE_MEM_CACHEATTR (~(uint32_t)0) struct xen_domctl_pin_mem_cacheattr { uint64_aligned_t start, end; uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ }; typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); /* XEN_DOMCTL_set_ext_vcpucontext */ /* XEN_DOMCTL_get_ext_vcpucontext */ struct xen_domctl_ext_vcpucontext { /* IN: VCPU that this call applies to. */ uint32_t vcpu; /* * SET: Size of struct (IN) * GET: Size of struct (OUT, up to 128 bytes) */ uint32_t size; #if defined(__i386__) || defined(__x86_64__) /* SYSCALL from 32-bit mode and SYSENTER callback information. */ /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ uint64_aligned_t syscall32_callback_eip; uint64_aligned_t sysenter_callback_eip; uint16_t syscall32_callback_cs; uint16_t sysenter_callback_cs; uint8_t syscall32_disables_events; uint8_t sysenter_disables_events; - uint64_aligned_t mcg_cap; +#if defined(__GNUC__) + union { + uint64_aligned_t mcg_cap; + struct hvm_vmce_vcpu vmce; + }; +#else + struct hvm_vmce_vcpu vmce; #endif +#endif }; typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); /* - * Set optimizaton features for a domain - */ -/* XEN_DOMCTL_set_opt_feature */ -struct xen_domctl_set_opt_feature { -#if defined(__ia64__) - struct xen_ia64_opt_feature optf; -#else - /* Make struct non-empty: do not depend on this field name! */ - uint64_t dummy; -#endif -}; -typedef struct xen_domctl_set_opt_feature xen_domctl_set_opt_feature_t; -DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_opt_feature_t); - -/* * Set the target domain for a domain */ /* XEN_DOMCTL_set_target */ struct xen_domctl_set_target { domid_t target; }; typedef struct xen_domctl_set_target xen_domctl_set_target_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); #if defined(__i386__) || defined(__x86_64__) # define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF /* XEN_DOMCTL_set_cpuid */ struct xen_domctl_cpuid { uint32_t input[2]; uint32_t eax; uint32_t ebx; uint32_t ecx; uint32_t edx; }; typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); #endif +/* + * Arranges that if the domain suspends (specifically, if it shuts + * down with code SHUTDOWN_suspend), this event channel will be + * notified. + * + * This is _instead of_ the usual notification to the global + * VIRQ_DOM_EXC. (In most systems that pirq is owned by xenstored.) + * + * Only one subscription per domain is possible. Last subscriber + * wins; others are silently displaced. + * + * NB that contrary to the rather general name, it only applies to + * domain shutdown with code suspend. Shutdown for other reasons + * (including crash), and domain death, are notified to VIRQ_DOM_EXC + * regardless. + */ /* XEN_DOMCTL_subscribe */ struct xen_domctl_subscribe { uint32_t port; /* IN */ }; typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); /* * Define the maximum machine address size which should be allocated * to a guest. */ /* XEN_DOMCTL_set_machine_address_size */ /* XEN_DOMCTL_get_machine_address_size */ /* * Do not inject spurious page faults into this domain. */ /* XEN_DOMCTL_suppress_spurious_page_faults */ /* XEN_DOMCTL_debug_op */ #define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 #define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 struct xen_domctl_debug_op { uint32_t op; /* IN */ uint32_t vcpu; /* IN */ }; typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); /* * Request a particular record from the HVM context */ /* XEN_DOMCTL_gethvmcontext_partial */ typedef struct xen_domctl_hvmcontext_partial { uint32_t type; /* IN: Type of record required */ uint32_t instance; /* IN: Instance of that type */ XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ } xen_domctl_hvmcontext_partial_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); /* XEN_DOMCTL_disable_migrate */ typedef struct xen_domctl_disable_migrate { uint32_t disable; /* IN: 1: disable migration and restore */ } xen_domctl_disable_migrate_t; /* XEN_DOMCTL_gettscinfo */ /* XEN_DOMCTL_settscinfo */ -struct xen_guest_tsc_info { +typedef struct xen_domctl_tsc_info { + /* IN/OUT */ uint32_t tsc_mode; uint32_t gtsc_khz; uint32_t incarnation; uint32_t pad; uint64_aligned_t elapsed_nsec; -}; -typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; -DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); -typedef struct xen_domctl_tsc_info { - XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ - xen_guest_tsc_info_t info; /* IN */ } xen_domctl_tsc_info_t; /* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ struct xen_domctl_gdbsx_memio { /* IN */ uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ uint64_aligned_t gva; /* guest virtual address */ uint64_aligned_t uva; /* user buffer virtual address */ uint32_t len; /* number of bytes to read/write */ uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ /* OUT */ uint32_t remain; /* bytes remaining to be copied */ }; /* XEN_DOMCTL_gdbsx_pausevcpu */ /* XEN_DOMCTL_gdbsx_unpausevcpu */ struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ uint32_t vcpu; /* which vcpu */ }; /* XEN_DOMCTL_gdbsx_domstatus */ struct xen_domctl_gdbsx_domstatus { /* OUT */ uint8_t paused; /* is the domain paused */ uint32_t vcpu_id; /* any vcpu in an event? */ uint32_t vcpu_ev; /* if yes, what event? */ }; /* - * Memory event operations + * VM event operations */ -/* XEN_DOMCTL_mem_event_op */ +/* XEN_DOMCTL_vm_event_op */ /* + * There are currently three rings available for VM events: + * sharing, monitor and paging. This hypercall allows one to + * control these rings (enable/disable), as well as to signal + * to the hypervisor to pull responses (resume) from the given + * ring. + */ +#define XEN_VM_EVENT_ENABLE 0 +#define XEN_VM_EVENT_DISABLE 1 +#define XEN_VM_EVENT_RESUME 2 + +/* * Domain memory paging * Page memory in and out. * Domctl interface to set up and tear down the * pager<->hypervisor interface. Use XENMEM_paging_op* * to perform per-page operations. * - * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several + * The XEN_VM_EVENT_PAGING_ENABLE domctl returns several * non-standard error codes to indicate why paging could not be enabled: * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest * EMLINK - guest has iommu passthrough enabled * EXDEV - guest has PoD enabled * EBUSY - guest has or had paging enabled, ring buffer still active */ -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 +#define XEN_DOMCTL_VM_EVENT_OP_PAGING 1 -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 - /* - * Access permissions. + * Monitor helper. * * As with paging, use the domctl for teardown/setup of the * helper<->hypervisor interface. * - * There are HVM hypercalls to set the per-page access permissions of every - * page in a domain. When one of these permissions--independent, read, - * write, and execute--is violated, the VCPU is paused and a memory event - * is sent with what happened. (See public/mem_event.h) . + * The monitor interface can be used to register for various VM events. For + * example, there are HVM hypercalls to set the per-page access permissions + * of every page in a domain. When one of these permissions--independent, + * read, write, and execute--is violated, the VCPU is paused and a memory event + * is sent with what happened. The memory event handler can then resume the + * VCPU and redo the access with a XEN_VM_EVENT_RESUME option. * - * The memory event handler can then resume the VCPU and redo the access - * with a XENMEM_access_op_resume hypercall. + * See public/vm_event.h for the list of available events that can be + * subscribed to via the monitor interface. * - * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several + * The XEN_VM_EVENT_MONITOR_* domctls returns * non-standard error codes to indicate why access could not be enabled: * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest * EBUSY - guest has or had access enabled, ring buffer still active + * */ -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 +#define XEN_DOMCTL_VM_EVENT_OP_MONITOR 2 -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 - /* * Sharing ENOMEM helper. * * As with paging, use the domctl for teardown/setup of the * helper<->hypervisor interface. * * If setup, this ring is used to communicate failed allocations * in the unshare path. XENMEM_sharing_op_resume is used to wake up * vcpus that could not unshare. * * Note that shring can be turned on (as per the domctl below) * *without* this ring being setup. */ -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 +#define XEN_DOMCTL_VM_EVENT_OP_SHARING 3 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1 - /* Use for teardown/setup of helper<->hypervisor interface for paging, * access and sharing.*/ -struct xen_domctl_mem_event_op { - uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ - uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ +struct xen_domctl_vm_event_op { + uint32_t op; /* XEN_VM_EVENT_* */ + uint32_t mode; /* XEN_DOMCTL_VM_EVENT_OP_* */ uint32_t port; /* OUT: event channel for ring */ }; -typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; -DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); +typedef struct xen_domctl_vm_event_op xen_domctl_vm_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vm_event_op_t); /* * Memory sharing operations */ /* XEN_DOMCTL_mem_sharing_op. * The CONTROL sub-domctl is used for bringup/teardown. */ #define XEN_DOMCTL_MEM_SHARING_CONTROL 0 struct xen_domctl_mem_sharing_op { uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ union { uint8_t enable; /* CONTROL */ } u; }; typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); struct xen_domctl_audit_p2m { /* OUT error counts */ uint64_t orphans; uint64_t m2p_bad; uint64_t p2m_bad; }; typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t); struct xen_domctl_set_virq_handler { uint32_t virq; /* IN */ }; typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t); #if defined(__i386__) || defined(__x86_64__) /* XEN_DOMCTL_setvcpuextstate */ /* XEN_DOMCTL_getvcpuextstate */ struct xen_domctl_vcpuextstate { /* IN: VCPU that this call applies to. */ uint32_t vcpu; /* - * SET: xfeature support mask of struct (IN) + * SET: Ignored. * GET: xfeature support mask of struct (IN/OUT) * xfeature mask is served as identifications of the saving format * so that compatible CPUs can have a check on format to decide * whether it can restore. */ uint64_aligned_t xfeature_mask; /* * SET: Size of struct (IN) * GET: Size of struct (IN/OUT) */ uint64_aligned_t size; XEN_GUEST_HANDLE_64(uint64) buffer; }; typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); #endif /* XEN_DOMCTL_set_access_required: sets whether a memory event listener * must be present to handle page access events: if false, the page * access will revert to full permissions if no one is listening; * */ struct xen_domctl_set_access_required { uint8_t access_required; }; typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); +struct xen_domctl_set_broken_page_p2m { + uint64_aligned_t pfn; +}; +typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); + +/* + * XEN_DOMCTL_set_max_evtchn: sets the maximum event channel port + * number the guest may use. Use this limit the amount of resources + * (global mapping space, xenheap) a guest may use for event channels. + */ +struct xen_domctl_set_max_evtchn { + uint32_t max_port; +}; +typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t); + +/* + * ARM: Clean and invalidate caches associated with given region of + * guest memory. + */ +struct xen_domctl_cacheflush { + /* IN: page range to flush. */ + xen_pfn_t start_pfn, nr_pfns; +}; +typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t); + +#if defined(__i386__) || defined(__x86_64__) +struct xen_domctl_vcpu_msr { + uint32_t index; + uint32_t reserved; + uint64_aligned_t value; +}; +typedef struct xen_domctl_vcpu_msr xen_domctl_vcpu_msr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msr_t); + +/* + * XEN_DOMCTL_set_vcpu_msrs / XEN_DOMCTL_get_vcpu_msrs. + * + * Input: + * - A NULL 'msrs' guest handle is a request for the maximum 'msr_count'. + * - Otherwise, 'msr_count' is the number of entries in 'msrs'. + * + * Output for get: + * - If 'msr_count' is less than the number Xen needs to write, -ENOBUFS shall + * be returned and 'msr_count' updated to reflect the intended number. + * - On success, 'msr_count' shall indicate the number of MSRs written, which + * may be less than the maximum if some are not currently used by the vcpu. + * + * Output for set: + * - If Xen encounters an error with a specific MSR, -EINVAL shall be returned + * and 'msr_count' shall be set to the offending index, to aid debugging. + */ +struct xen_domctl_vcpu_msrs { + uint32_t vcpu; /* IN */ + uint32_t msr_count; /* IN/OUT */ + XEN_GUEST_HANDLE_64(xen_domctl_vcpu_msr_t) msrs; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t); +#endif + +/* XEN_DOMCTL_setvnumainfo: specifies a virtual NUMA topology for the guest */ +struct xen_domctl_vnuma { + /* IN: number of vNUMA nodes to setup. Shall be greater than 0 */ + uint32_t nr_vnodes; + /* IN: number of memory ranges to setup */ + uint32_t nr_vmemranges; + /* + * IN: number of vCPUs of the domain (used as size of the vcpu_to_vnode + * array declared below). Shall be equal to the domain's max_vcpus. + */ + uint32_t nr_vcpus; + uint32_t pad; /* must be zero */ + + /* + * IN: array for specifying the distances of the vNUMA nodes + * between each others. Shall have nr_vnodes*nr_vnodes elements. + */ + XEN_GUEST_HANDLE_64(uint) vdistance; + /* + * IN: array for specifying to what vNUMA node each vCPU belongs. + * Shall have nr_vcpus elements. + */ + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + /* + * IN: array for specifying on what physical NUMA node each vNUMA + * node is placed. Shall have nr_vnodes elements. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + /* + * IN: array for specifying the memory ranges. Shall have + * nr_vmemranges elements. + */ + XEN_GUEST_HANDLE_64(xen_vmemrange_t) vmemrange; +}; +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + +struct xen_domctl_psr_cmt_op { +#define XEN_DOMCTL_PSR_CMT_OP_DETACH 0 +#define XEN_DOMCTL_PSR_CMT_OP_ATTACH 1 +#define XEN_DOMCTL_PSR_CMT_OP_QUERY_RMID 2 + uint32_t cmd; + uint32_t data; +}; +typedef struct xen_domctl_psr_cmt_op xen_domctl_psr_cmt_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t); + +/* XEN_DOMCTL_MONITOR_* + * + * Enable/disable monitoring various VM events. + * This domctl configures what events will be reported to helper apps + * via the ring buffer "MONITOR". The ring has to be first enabled + * with the domctl XEN_DOMCTL_VM_EVENT_OP_MONITOR. + * + * GET_CAPABILITIES can be used to determine which of these features is + * available on a given platform. + * + * NOTICE: mem_access events are also delivered via the "MONITOR" ring buffer; + * however, enabling/disabling those events is performed with the use of + * memory_op hypercalls! + */ +#define XEN_DOMCTL_MONITOR_OP_ENABLE 0 +#define XEN_DOMCTL_MONITOR_OP_DISABLE 1 +#define XEN_DOMCTL_MONITOR_OP_GET_CAPABILITIES 2 + +#define XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG 0 +#define XEN_DOMCTL_MONITOR_EVENT_MOV_TO_MSR 1 +#define XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP 2 +#define XEN_DOMCTL_MONITOR_EVENT_SOFTWARE_BREAKPOINT 3 +#define XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST 4 + +struct xen_domctl_monitor_op { + uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */ + + /* + * When used with ENABLE/DISABLE this has to be set to + * the requested XEN_DOMCTL_MONITOR_EVENT_* value. + * With GET_CAPABILITIES this field returns a bitmap of + * events supported by the platform, in the format + * (1 << XEN_DOMCTL_MONITOR_EVENT_*). + */ + uint32_t event; + + /* + * Further options when issuing XEN_DOMCTL_MONITOR_OP_ENABLE. + */ + union { + struct { + /* Which control register */ + uint8_t index; + /* Pause vCPU until response */ + uint8_t sync; + /* Send event only on a change of value */ + uint8_t onchangeonly; + } mov_to_cr; + + struct { + /* Enable the capture of an extended set of MSRs */ + uint8_t extended_capture; + } mov_to_msr; + + struct { + /* Pause vCPU until response */ + uint8_t sync; + } guest_request; + } u; +}; +typedef struct xen_domctl_monitor_op xen_domctl_monitor_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_monitor_op_t); + +struct xen_domctl_psr_cat_op { +#define XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM 0 +#define XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM 1 + uint32_t cmd; /* IN: XEN_DOMCTL_PSR_CAT_OP_* */ + uint32_t target; /* IN */ + uint64_t data; /* IN/OUT */ +}; +typedef struct xen_domctl_psr_cat_op xen_domctl_psr_cat_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cat_op_t); + struct xen_domctl { uint32_t cmd; #define XEN_DOMCTL_createdomain 1 #define XEN_DOMCTL_destroydomain 2 #define XEN_DOMCTL_pausedomain 3 #define XEN_DOMCTL_unpausedomain 4 #define XEN_DOMCTL_getdomaininfo 5 #define XEN_DOMCTL_getmemlist 6 -#define XEN_DOMCTL_getpageframeinfo 7 -#define XEN_DOMCTL_getpageframeinfo2 8 +/* #define XEN_DOMCTL_getpageframeinfo 7 Obsolete - use getpageframeinfo3 */ +/* #define XEN_DOMCTL_getpageframeinfo2 8 Obsolete - use getpageframeinfo3 */ #define XEN_DOMCTL_setvcpuaffinity 9 #define XEN_DOMCTL_shadow_op 10 #define XEN_DOMCTL_max_mem 11 #define XEN_DOMCTL_setvcpucontext 12 #define XEN_DOMCTL_getvcpucontext 13 #define XEN_DOMCTL_getvcpuinfo 14 #define XEN_DOMCTL_max_vcpus 15 #define XEN_DOMCTL_scheduler_op 16 #define XEN_DOMCTL_setdomainhandle 17 #define XEN_DOMCTL_setdebugging 18 #define XEN_DOMCTL_irq_permission 19 #define XEN_DOMCTL_iomem_permission 20 #define XEN_DOMCTL_ioport_permission 21 #define XEN_DOMCTL_hypercall_init 22 -#define XEN_DOMCTL_arch_setup 23 +#define XEN_DOMCTL_arch_setup 23 /* Obsolete IA64 only */ #define XEN_DOMCTL_settimeoffset 24 #define XEN_DOMCTL_getvcpuaffinity 25 -#define XEN_DOMCTL_real_mode_area 26 +#define XEN_DOMCTL_real_mode_area 26 /* Obsolete PPC only */ #define XEN_DOMCTL_resumedomain 27 #define XEN_DOMCTL_sendtrigger 28 #define XEN_DOMCTL_subscribe 29 #define XEN_DOMCTL_gethvmcontext 33 #define XEN_DOMCTL_sethvmcontext 34 #define XEN_DOMCTL_set_address_size 35 #define XEN_DOMCTL_get_address_size 36 #define XEN_DOMCTL_assign_device 37 #define XEN_DOMCTL_bind_pt_irq 38 #define XEN_DOMCTL_memory_mapping 39 #define XEN_DOMCTL_ioport_mapping 40 #define XEN_DOMCTL_pin_mem_cacheattr 41 #define XEN_DOMCTL_set_ext_vcpucontext 42 #define XEN_DOMCTL_get_ext_vcpucontext 43 -#define XEN_DOMCTL_set_opt_feature 44 +#define XEN_DOMCTL_set_opt_feature 44 /* Obsolete IA64 only */ #define XEN_DOMCTL_test_assign_device 45 #define XEN_DOMCTL_set_target 46 #define XEN_DOMCTL_deassign_device 47 #define XEN_DOMCTL_unbind_pt_irq 48 #define XEN_DOMCTL_set_cpuid 49 #define XEN_DOMCTL_get_device_group 50 #define XEN_DOMCTL_set_machine_address_size 51 #define XEN_DOMCTL_get_machine_address_size 52 #define XEN_DOMCTL_suppress_spurious_page_faults 53 #define XEN_DOMCTL_debug_op 54 #define XEN_DOMCTL_gethvmcontext_partial 55 -#define XEN_DOMCTL_mem_event_op 56 +#define XEN_DOMCTL_vm_event_op 56 #define XEN_DOMCTL_mem_sharing_op 57 #define XEN_DOMCTL_disable_migrate 58 #define XEN_DOMCTL_gettscinfo 59 #define XEN_DOMCTL_settscinfo 60 #define XEN_DOMCTL_getpageframeinfo3 61 #define XEN_DOMCTL_setvcpuextstate 62 #define XEN_DOMCTL_getvcpuextstate 63 #define XEN_DOMCTL_set_access_required 64 #define XEN_DOMCTL_audit_p2m 65 #define XEN_DOMCTL_set_virq_handler 66 +#define XEN_DOMCTL_set_broken_page_p2m 67 +#define XEN_DOMCTL_setnodeaffinity 68 +#define XEN_DOMCTL_getnodeaffinity 69 +#define XEN_DOMCTL_set_max_evtchn 70 +#define XEN_DOMCTL_cacheflush 71 +#define XEN_DOMCTL_get_vcpu_msrs 72 +#define XEN_DOMCTL_set_vcpu_msrs 73 +#define XEN_DOMCTL_setvnumainfo 74 +#define XEN_DOMCTL_psr_cmt_op 75 +#define XEN_DOMCTL_monitor_op 77 +#define XEN_DOMCTL_psr_cat_op 78 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 #define XEN_DOMCTL_gdbsx_domstatus 1003 uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ domid_t domain; union { struct xen_domctl_createdomain createdomain; struct xen_domctl_getdomaininfo getdomaininfo; struct xen_domctl_getmemlist getmemlist; - struct xen_domctl_getpageframeinfo getpageframeinfo; - struct xen_domctl_getpageframeinfo2 getpageframeinfo2; struct xen_domctl_getpageframeinfo3 getpageframeinfo3; + struct xen_domctl_nodeaffinity nodeaffinity; struct xen_domctl_vcpuaffinity vcpuaffinity; struct xen_domctl_shadow_op shadow_op; struct xen_domctl_max_mem max_mem; struct xen_domctl_vcpucontext vcpucontext; struct xen_domctl_getvcpuinfo getvcpuinfo; struct xen_domctl_max_vcpus max_vcpus; struct xen_domctl_scheduler_op scheduler_op; struct xen_domctl_setdomainhandle setdomainhandle; struct xen_domctl_setdebugging setdebugging; struct xen_domctl_irq_permission irq_permission; struct xen_domctl_iomem_permission iomem_permission; struct xen_domctl_ioport_permission ioport_permission; struct xen_domctl_hypercall_init hypercall_init; - struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_disable_migrate disable_migrate; struct xen_domctl_tsc_info tsc_info; - struct xen_domctl_real_mode_area real_mode_area; struct xen_domctl_hvmcontext hvmcontext; struct xen_domctl_hvmcontext_partial hvmcontext_partial; struct xen_domctl_address_size address_size; struct xen_domctl_sendtrigger sendtrigger; struct xen_domctl_get_device_group get_device_group; struct xen_domctl_assign_device assign_device; struct xen_domctl_bind_pt_irq bind_pt_irq; struct xen_domctl_memory_mapping memory_mapping; struct xen_domctl_ioport_mapping ioport_mapping; struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; struct xen_domctl_ext_vcpucontext ext_vcpucontext; - struct xen_domctl_set_opt_feature set_opt_feature; struct xen_domctl_set_target set_target; struct xen_domctl_subscribe subscribe; struct xen_domctl_debug_op debug_op; - struct xen_domctl_mem_event_op mem_event_op; + struct xen_domctl_vm_event_op vm_event_op; struct xen_domctl_mem_sharing_op mem_sharing_op; #if defined(__i386__) || defined(__x86_64__) struct xen_domctl_cpuid cpuid; struct xen_domctl_vcpuextstate vcpuextstate; + struct xen_domctl_vcpu_msrs vcpu_msrs; #endif struct xen_domctl_set_access_required access_required; struct xen_domctl_audit_p2m audit_p2m; struct xen_domctl_set_virq_handler set_virq_handler; + struct xen_domctl_set_max_evtchn set_max_evtchn; struct xen_domctl_gdbsx_memio gdbsx_guest_memio; + struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; + struct xen_domctl_cacheflush cacheflush; struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; + struct xen_domctl_psr_cmt_op psr_cmt_op; + struct xen_domctl_monitor_op monitor_op; + struct xen_domctl_psr_cat_op psr_cat_op; uint8_t pad[128]; } u; }; typedef struct xen_domctl xen_domctl_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); #endif /* __XEN_PUBLIC_DOMCTL_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/elfnote.h =================================================================== --- projects/clang370-import/sys/xen/interface/elfnote.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/elfnote.h (revision 288926) @@ -1,263 +1,271 @@ /****************************************************************************** * elfnote.h * * Definitions used for the Xen ELF notes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2006, Ian Campbell, XenSource Ltd. */ #ifndef __XEN_PUBLIC_ELFNOTE_H__ #define __XEN_PUBLIC_ELFNOTE_H__ /* + * `incontents 200 elfnotes ELF notes + * * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of * the desc field. * * LEGACY indicated the fields in the legacy __xen_guest string which * this a note type replaces. + * + * String values (for non-legacy) are NULL terminated ASCII, also known + * as ASCIZ type. */ /* * NAME=VALUE pair (string). */ #define XEN_ELFNOTE_INFO 0 /* * The virtual address of the entry point (numeric). * * LEGACY: VIRT_ENTRY */ #define XEN_ELFNOTE_ENTRY 1 /* The virtual address of the hypercall transfer page (numeric). * * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page * number not a virtual address) */ #define XEN_ELFNOTE_HYPERCALL_PAGE 2 /* The virtual address where the kernel image should be mapped (numeric). * * Defaults to 0. * * LEGACY: VIRT_BASE */ #define XEN_ELFNOTE_VIRT_BASE 3 /* - * The offset of the ELF paddr field from the acutal required - * psuedo-physical address (numeric). + * The offset of the ELF paddr field from the actual required + * pseudo-physical address (numeric). * * This is used to maintain backwards compatibility with older kernels * which wrote __PAGE_OFFSET into that field. This field defaults to 0 * if not present. * * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) */ #define XEN_ELFNOTE_PADDR_OFFSET 4 /* * The version of Xen that we work with (string). * * LEGACY: XEN_VER */ #define XEN_ELFNOTE_XEN_VERSION 5 /* * The name of the guest operating system (string). * * LEGACY: GUEST_OS */ #define XEN_ELFNOTE_GUEST_OS 6 /* * The version of the guest operating system (string). * * LEGACY: GUEST_VER */ #define XEN_ELFNOTE_GUEST_VERSION 7 /* * The loader type (string). * * LEGACY: LOADER */ #define XEN_ELFNOTE_LOADER 8 /* * The kernel supports PAE (x86/32 only, string = "yes", "no" or * "bimodal"). * * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting * may be given as "yes,bimodal" which will cause older Xen to treat * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be * placed above 4G. It is assumed that any kernel new enough to use * these ELF notes will include this and therefore "yes" here is * equivalent to "yes[entended-cr3]" in the __xen_guest interface. */ #define XEN_ELFNOTE_PAE_MODE 9 /* * The features supported/required by this kernel (string). * * The string must consist of a list of feature names (as given in * features.h, without the "XENFEAT_" prefix) separated by '|' * characters. If a feature is required for the kernel to function * then the feature name must be preceded by a '!' character. * * LEGACY: FEATURES */ #define XEN_ELFNOTE_FEATURES 10 /* * The kernel requires the symbol table to be loaded (string = "yes" or "no") * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence * of this string as a boolean flag rather than requiring "yes" or * "no". */ #define XEN_ELFNOTE_BSD_SYMTAB 11 /* * The lowest address the hypervisor hole can begin at (numeric). * * This must not be set higher than HYPERVISOR_VIRT_START. Its presence * also indicates to the hypervisor that the kernel can deal with the * hole starting at a higher address. */ #define XEN_ELFNOTE_HV_START_LOW 12 /* * List of maddr_t-sized mask/value pairs describing how to recognize * (non-present) L1 page table entries carrying valid MFNs (numeric). */ #define XEN_ELFNOTE_L1_MFN_VALID 13 /* * Whether or not the guest supports cooperative suspend cancellation. + * This is a numeric value. + * + * Default is 0 */ #define XEN_ELFNOTE_SUSPEND_CANCEL 14 /* * The (non-default) location the initial phys-to-machine map should be * placed at by the hypervisor (Dom0) or the tools (DomU). * The kernel must be prepared for this mapping to be established using * large pages, despite such otherwise not being available to guests. * The kernel must also be able to handle the page table pages used for * this mapping not being accessible through the initial mapping. * (Only x86-64 supports this at present.) */ #define XEN_ELFNOTE_INIT_P2M 15 /* * Whether or not the guest can deal with being passed an initrd not * mapped through its initial page tables. */ #define XEN_ELFNOTE_MOD_START_PFN 16 /* * The features supported by this kernel (numeric). * * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a * kernel to specify support for features that older hypervisors don't * know about. The set of features 4.2 and newer hypervisors will * consider supported by the kernel is the combination of the sets * specified through this and the string note. * * LEGACY: FEATURES */ #define XEN_ELFNOTE_SUPPORTED_FEATURES 17 /* * The number of the highest elfnote defined. */ #define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES /* * System information exported through crash notes. * * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO * note in case of a system crash. This note will contain various * information about the system, see xen/include/xen/elfcore.h. */ #define XEN_ELFNOTE_CRASH_INFO 0x1000001 /* * System registers exported through crash notes. * * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS * note per cpu in case of a system crash. This note is architecture * specific and will contain registers not saved in the "CORE" note. * See xen/include/xen/elfcore.h for more information. */ #define XEN_ELFNOTE_CRASH_REGS 0x1000002 /* * xen dump-core none note. * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE * in its dump file to indicate that the file is xen dump-core * file. This note doesn't have any other information. * See tools/libxc/xc_core.h for more information. */ #define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 /* * xen dump-core header note. * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER * in its dump file. * See tools/libxc/xc_core.h for more information. */ #define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 /* * xen dump-core xen version note. * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION * in its dump file. It contains the xen version obtained via the * XENVER hypercall. * See tools/libxc/xc_core.h for more information. */ #define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 /* * xen dump-core format version note. * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION * in its dump file. It contains a format version identifier. * See tools/libxc/xc_core.h for more information. */ #define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/errno.h =================================================================== --- projects/clang370-import/sys/xen/interface/errno.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/errno.h (revision 288926) @@ -0,0 +1,95 @@ +#ifndef __XEN_PUBLIC_ERRNO_H__ + +#ifndef __ASSEMBLY__ + +#define XEN_ERRNO(name, value) XEN_##name = value, +enum xen_errno { + +#else /* !__ASSEMBLY__ */ + +#define XEN_ERRNO(name, value) .equ XEN_##name, value + +#endif /* __ASSEMBLY__ */ + +/* ` enum neg_errnoval { [ -Efoo for each Efoo in the list below ] } */ +/* ` enum errnoval { */ + +#endif /* __XEN_PUBLIC_ERRNO_H__ */ + +#ifdef XEN_ERRNO + +/* + * Values originating from x86 Linux. Please consider using respective + * values when adding new definitions here. + * + * The set of identifiers to be added here shouldn't extend beyond what + * POSIX mandates (see e.g. + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html) + * with the exception that we support some optional (XSR) values + * specified there (but no new ones should be added). + */ + +XEN_ERRNO(EPERM, 1) /* Operation not permitted */ +XEN_ERRNO(ENOENT, 2) /* No such file or directory */ +XEN_ERRNO(ESRCH, 3) /* No such process */ +#ifdef __XEN__ /* Internal only, should never be exposed to the guest. */ +XEN_ERRNO(EINTR, 4) /* Interrupted system call */ +#endif +XEN_ERRNO(EIO, 5) /* I/O error */ +XEN_ERRNO(ENXIO, 6) /* No such device or address */ +XEN_ERRNO(E2BIG, 7) /* Arg list too long */ +XEN_ERRNO(ENOEXEC, 8) /* Exec format error */ +XEN_ERRNO(EBADF, 9) /* Bad file number */ +XEN_ERRNO(ECHILD, 10) /* No child processes */ +XEN_ERRNO(EAGAIN, 11) /* Try again */ +XEN_ERRNO(ENOMEM, 12) /* Out of memory */ +XEN_ERRNO(EACCES, 13) /* Permission denied */ +XEN_ERRNO(EFAULT, 14) /* Bad address */ +XEN_ERRNO(EBUSY, 16) /* Device or resource busy */ +XEN_ERRNO(EEXIST, 17) /* File exists */ +XEN_ERRNO(EXDEV, 18) /* Cross-device link */ +XEN_ERRNO(ENODEV, 19) /* No such device */ +XEN_ERRNO(EINVAL, 22) /* Invalid argument */ +XEN_ERRNO(ENFILE, 23) /* File table overflow */ +XEN_ERRNO(EMFILE, 24) /* Too many open files */ +XEN_ERRNO(ENOSPC, 28) /* No space left on device */ +XEN_ERRNO(EMLINK, 31) /* Too many links */ +XEN_ERRNO(EDOM, 33) /* Math argument out of domain of func */ +XEN_ERRNO(ERANGE, 34) /* Math result not representable */ +XEN_ERRNO(EDEADLK, 35) /* Resource deadlock would occur */ +XEN_ERRNO(ENAMETOOLONG, 36) /* File name too long */ +XEN_ERRNO(ENOLCK, 37) /* No record locks available */ +XEN_ERRNO(ENOSYS, 38) /* Function not implemented */ +XEN_ERRNO(ENODATA, 61) /* No data available */ +XEN_ERRNO(ETIME, 62) /* Timer expired */ +XEN_ERRNO(EBADMSG, 74) /* Not a data message */ +XEN_ERRNO(EOVERFLOW, 75) /* Value too large for defined data type */ +XEN_ERRNO(EILSEQ, 84) /* Illegal byte sequence */ +#ifdef __XEN__ /* Internal only, should never be exposed to the guest. */ +XEN_ERRNO(ERESTART, 85) /* Interrupted system call should be restarted */ +#endif +XEN_ERRNO(ENOTSOCK, 88) /* Socket operation on non-socket */ +XEN_ERRNO(EOPNOTSUPP, 95) /* Operation not supported on transport endpoint */ +XEN_ERRNO(EADDRINUSE, 98) /* Address already in use */ +XEN_ERRNO(EADDRNOTAVAIL, 99) /* Cannot assign requested address */ +XEN_ERRNO(ENOBUFS, 105) /* No buffer space available */ +XEN_ERRNO(EISCONN, 106) /* Transport endpoint is already connected */ +XEN_ERRNO(ENOTCONN, 107) /* Transport endpoint is not connected */ +XEN_ERRNO(ETIMEDOUT, 110) /* Connection timed out */ + +#undef XEN_ERRNO +#endif /* XEN_ERRNO */ + +#ifndef __XEN_PUBLIC_ERRNO_H__ +#define __XEN_PUBLIC_ERRNO_H__ + +/* ` } */ + +#ifndef __ASSEMBLY__ +}; +#endif + +#define XEN_EWOULDBLOCK XEN_EAGAIN /* Operation would block */ +#define XEN_EDEADLOCK XEN_EDEADLK /* Resource deadlock would occur */ + +#endif /* __XEN_PUBLIC_ERRNO_H__ */ Property changes on: projects/clang370-import/sys/xen/interface/errno.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/event_channel.h =================================================================== --- projects/clang370-import/sys/xen/interface/event_channel.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/event_channel.h (revision 288926) @@ -1,297 +1,385 @@ /****************************************************************************** * event_channel.h * * Event channels between domains. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2003-2004, K A Fraser. */ #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ #include "xen.h" /* * `incontents 150 evtchn Event Channels * * Event channels are the basic primitive provided by Xen for event * notifications. An event is the Xen equivalent of a hardware * interrupt. They essentially store one bit of information, the event * of interest is signalled by transitioning this bit from 0 to 1. * * Notifications are received by a guest via an upcall from Xen, * indicating when an event arrives (setting the bit). Further * notifications are masked until the bit is cleared again (therefore, * guests must check the value of the bit after re-enabling event * delivery to ensure no missed notifications). * * Event notifications can be masked by setting a flag; this is * equivalent to disabling interrupts and can be used to ensure * atomicity of certain operations in the guest kernel. * * Event channels are represented by the evtchn_* fields in * struct shared_info and struct vcpu_info. */ /* * ` enum neg_errnoval * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) * ` * @cmd == EVTCHNOP_* (event-channel operation). * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). */ /* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ #define EVTCHNOP_bind_interdomain 0 #define EVTCHNOP_bind_virq 1 #define EVTCHNOP_bind_pirq 2 #define EVTCHNOP_close 3 #define EVTCHNOP_send 4 #define EVTCHNOP_status 5 #define EVTCHNOP_alloc_unbound 6 #define EVTCHNOP_bind_ipi 7 #define EVTCHNOP_bind_vcpu 8 #define EVTCHNOP_unmask 9 #define EVTCHNOP_reset 10 +#define EVTCHNOP_init_control 11 +#define EVTCHNOP_expand_array 12 +#define EVTCHNOP_set_priority 13 /* ` } */ -#ifndef __XEN_EVTCHN_PORT_DEFINED__ typedef uint32_t evtchn_port_t; DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); -#define __XEN_EVTCHN_PORT_DEFINED__ 1 -#endif /* * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as * accepting interdomain bindings from domain . A fresh port * is allocated in and returned as . * NOTES: * 1. If the caller is unprivileged then must be DOMID_SELF. * 2. may be DOMID_SELF, allowing loopback connections. */ struct evtchn_alloc_unbound { /* IN parameters */ domid_t dom, remote_dom; /* OUT parameters */ evtchn_port_t port; }; typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between * the calling domain and . must identify * a port that is unbound and marked as accepting bindings from the calling * domain. A fresh port is allocated in the calling domain and returned as * . + * + * In case the peer domain has already tried to set our event channel + * pending, before it was bound, EVTCHNOP_bind_interdomain always sets + * the local event channel pending. + * + * The usual pattern of use, in the guest's upcall (or subsequent + * handler) is as follows: (Re-enable the event channel for subsequent + * signalling and then) check for the existence of whatever condition + * is being waited for by other means, and take whatever action is + * needed (if any). + * * NOTES: * 1. may be DOMID_SELF, allowing loopback connections. */ struct evtchn_bind_interdomain { /* IN parameters. */ domid_t remote_dom; evtchn_port_t remote_port; /* OUT parameters. */ evtchn_port_t local_port; }; typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; /* * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified * vcpu. * NOTES: * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list * in xen.h for the classification of each VIRQ. * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be * re-bound via EVTCHNOP_bind_vcpu. * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. * The allocated event channel is bound to the specified vcpu and the * binding cannot be changed. */ struct evtchn_bind_virq { /* IN parameters. */ uint32_t virq; /* enum virq */ uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; }; typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). * NOTES: * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ struct evtchn_bind_pirq { /* IN parameters. */ uint32_t pirq; #define BIND_PIRQ__WILL_SHARE 1 uint32_t flags; /* BIND_PIRQ__* */ /* OUT parameters. */ evtchn_port_t port; }; typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. * NOTES: * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ struct evtchn_bind_ipi { uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; }; typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; /* * EVTCHNOP_close: Close a local event channel . If the channel is * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ struct evtchn_close { /* IN parameters. */ evtchn_port_t port; }; typedef struct evtchn_close evtchn_close_t; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ struct evtchn_send { /* IN parameters. */ evtchn_port_t port; }; typedef struct evtchn_send evtchn_send_t; /* * EVTCHNOP_status: Get the current status of the communication channel which * has an endpoint at . * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which is not DOMID_SELF. */ struct evtchn_status { /* IN parameters */ domid_t dom; evtchn_port_t port; /* OUT parameters */ #define EVTCHNSTAT_closed 0 /* Channel is not in use. */ #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ uint32_t status; uint32_t vcpu; /* VCPU to which this channel is bound. */ union { struct { domid_t dom; } unbound; /* EVTCHNSTAT_unbound */ struct { domid_t dom; evtchn_port_t port; } interdomain; /* EVTCHNSTAT_interdomain */ uint32_t pirq; /* EVTCHNSTAT_pirq */ uint32_t virq; /* EVTCHNSTAT_virq */ } u; }; typedef struct evtchn_status evtchn_status_t; /* * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an * event is pending. * NOTES: * 1. IPI-bound channels always notify the vcpu specified at bind time. * This binding cannot be changed. * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. * This binding cannot be changed. * 3. All other channels notify vcpu0 by default. This default is set when * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ struct evtchn_bind_vcpu { /* IN parameters. */ evtchn_port_t port; uint32_t vcpu; }; typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; /* * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ struct evtchn_unmask { /* IN parameters. */ evtchn_port_t port; }; typedef struct evtchn_unmask evtchn_unmask_t; /* * EVTCHNOP_reset: Close all event channels associated with specified domain. * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + * 3. Destroys all control blocks and event array, resets event channel + * operations to 2-level ABI if called with == DOMID_SELF and FIFO + * ABI was used. Guests should not bind events during EVTCHNOP_reset call + * as these events are likely to be lost. */ struct evtchn_reset { /* IN parameters. */ domid_t dom; }; typedef struct evtchn_reset evtchn_reset_t; /* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + * + * Note: any events that are currently pending will not be resent and + * will be lost. Guests should call this before binding any event to + * avoid losing any events. + */ +struct evtchn_init_control { + /* IN parameters. */ + uint64_t control_gfn; + uint32_t offset; + uint32_t vcpu; + /* OUT parameters. */ + uint8_t link_bits; + uint8_t _pad[7]; +}; +typedef struct evtchn_init_control evtchn_init_control_t; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +struct evtchn_expand_array { + /* IN parameters. */ + uint64_t array_gfn; +}; +typedef struct evtchn_expand_array evtchn_expand_array_t; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +struct evtchn_set_priority { + /* IN parameters. */ + uint32_t port; + uint32_t priority; +}; +typedef struct evtchn_set_priority evtchn_set_priority_t; + +/* * ` enum neg_errnoval * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) * ` * Superceded by new event_channel_op() hypercall since 0x00030202. */ struct evtchn_op { uint32_t cmd; /* enum event_channel_op */ union { struct evtchn_alloc_unbound alloc_unbound; struct evtchn_bind_interdomain bind_interdomain; struct evtchn_bind_virq bind_virq; struct evtchn_bind_pirq bind_pirq; struct evtchn_bind_ipi bind_ipi; struct evtchn_close close; struct evtchn_send send; struct evtchn_status status; struct evtchn_bind_vcpu bind_vcpu; struct evtchn_unmask unmask; } u; }; typedef struct evtchn_op evtchn_op_t; DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef uint32_t event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + uint32_t ready; + uint32_t _rsvd; + uint32_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; +typedef struct evtchn_fifo_control_block evtchn_fifo_control_block_t; + #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/features.h =================================================================== --- projects/clang370-import/sys/xen/interface/features.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/features.h (revision 288926) @@ -1,95 +1,117 @@ /****************************************************************************** * features.h * * Feature flags, reported by XENVER_get_features. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2006, Keir Fraser */ #ifndef __XEN_PUBLIC_FEATURES_H__ #define __XEN_PUBLIC_FEATURES_H__ /* + * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES + * + * The list of all the features the guest supports. They are set by + * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES + * string. The format is the feature names (as given here without the + * "XENFEAT_" prefix) separated by '|' characters. + * If a feature is required for the kernel to function then the feature name + * must be preceded by a '!' character. + * + * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the + * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0, + */ + +/* * If set, the guest does not need to write-protect its pagetables, and can * update them via direct writes. */ #define XENFEAT_writable_page_tables 0 /* * If set, the guest does not need to write-protect its segment descriptor * tables, and can update them via direct writes. */ #define XENFEAT_writable_descriptor_tables 1 /* * If set, translation between the guest's 'pseudo-physical' address space * and the host's machine address space are handled by the hypervisor. In this * mode the guest does not need to perform phys-to/from-machine translations * when performing page table operations. */ #define XENFEAT_auto_translated_physmap 2 /* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ #define XENFEAT_supervisor_mode_kernel 3 /* * If set, the guest does not need to allocate x86 PAE page directories * below 4GB. This flag is usually implied by auto_translated_physmap. */ #define XENFEAT_pae_pgdir_above_4gb 4 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 /* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ #define XENFEAT_highmem_assist 6 /* * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel * available pte bits. */ #define XENFEAT_gnttab_map_avail_bits 7 /* x86: Does this Xen host support the HVM callback vector type? */ #define XENFEAT_hvm_callback_vector 8 /* x86: pvclock algorithm is safe to use on HVM */ #define XENFEAT_hvm_safe_pvclock 9 /* x86: pirq can be used by HVM guests */ #define XENFEAT_hvm_pirqs 10 /* operation as Dom0 is supported */ #define XENFEAT_dom0 11 +/* Xen also maps grant references at pfn = mfn. + * This feature flag is deprecated and should not be used. +#define XENFEAT_grant_map_identity 12 + */ + +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */ +#define XENFEAT_memory_op_vnode_supported 13 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/gcov.h =================================================================== --- projects/clang370-import/sys/xen/interface/gcov.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/gcov.h (revision 288926) @@ -0,0 +1,115 @@ +/****************************************************************************** + * gcov.h + * + * Coverage structures exported by Xen. + * Structure is different from Gcc one. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2013, Citrix Systems R&D Ltd. + */ + +#ifndef __XEN_PUBLIC_GCOV_H__ +#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__ + +#define XENCOV_COUNTERS 5 +#define XENCOV_TAG_BASE 0x58544300u +#define XENCOV_TAG_FILE (XENCOV_TAG_BASE+0x46u) +#define XENCOV_TAG_FUNC (XENCOV_TAG_BASE+0x66u) +#define XENCOV_TAG_COUNTER(n) (XENCOV_TAG_BASE+0x30u+((n)&0xfu)) +#define XENCOV_TAG_END (XENCOV_TAG_BASE+0x2eu) +#define XENCOV_IS_TAG_COUNTER(n) \ + ((n) >= XENCOV_TAG_COUNTER(0) && (n) < XENCOV_TAG_COUNTER(XENCOV_COUNTERS)) +#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0)) + +/* + * The main structure for the blob is + * BLOB := FILE.. END + * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS + * FILENAME := LEN characters + * characters are padded to 32 bit + * LEN := 32 bit value + * COUNTERS := TAG_COUNTER(n) NUM COUNTER.. + * NUM := 32 bit valie + * COUNTER := 64 bit value + * FUNCTIONS := TAG_FUNC NUM FUNCTION.. + * FUNCTION := IDENT CHECKSUM NUM_COUNTERS + * + * All tagged structures are aligned to 8 bytes + */ + +/** + * File information + * Prefixed with XENCOV_TAG_FILE and a string with filename + * Aligned to 8 bytes + */ +struct xencov_file +{ + uint32_t tag; /* XENCOV_TAG_FILE */ + uint32_t version; + uint32_t stamp; + uint32_t fn_len; + char filename[1]; +}; + + +/** + * Counters information + * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1) + * Aligned to 8 bytes + */ +struct xencov_counter +{ + uint32_t tag; /* XENCOV_TAG_COUNTER(n) */ + uint32_t num; + uint64_t values[1]; +}; + +/** + * Information for each function + * Number of counter is equal to the number of counter structures got before + */ +struct xencov_function +{ + uint32_t ident; + uint32_t checksum; + uint32_t num_counters[1]; +}; + +/** + * Information for all functions + * Aligned to 8 bytes + */ +struct xencov_functions +{ + uint32_t tag; /* XENCOV_TAG_FUNC */ + uint32_t num; + struct xencov_function xencov_function[1]; +}; + +/** + * Terminator + */ +struct xencov_end +{ + uint32_t tag; /* XENCOV_TAG_END */ +}; + +#endif /* __XEN_PUBLIC_GCOV_H__ */ + Property changes on: projects/clang370-import/sys/xen/interface/gcov.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/grant_table.h =================================================================== --- projects/clang370-import/sys/xen/interface/grant_table.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/grant_table.h (revision 288926) @@ -1,660 +1,684 @@ /****************************************************************************** * grant_table.h * * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004, K A Fraser */ #ifndef __XEN_PUBLIC_GRANT_TABLE_H__ #define __XEN_PUBLIC_GRANT_TABLE_H__ #include "xen.h" /* * `incontents 150 gnttab Grant Tables * * Xen's grant tables provide a generic mechanism to memory sharing * between domains. This shared memory interface underpins the split * device drivers for block and network IO. * * Each domain has its own grant table. This is a data structure that * is shared with Xen; it allows the domain to tell Xen what kind of * permissions other domains have on its pages. Entries in the grant * table are identified by grant references. A grant reference is an * integer, which indexes into the grant table. It acts as a * capability which the grantee can use to perform operations on the * granter’s memory. * * This capability-based system allows shared-memory communications * between unprivileged domains. A grant reference also encapsulates * the details of a shared page, removing the need for a domain to * know the real machine address of a page it is sharing. This makes * it possible to share memory correctly with domains running in * fully virtualised memory. */ /*********************************** * GRANT TABLE REPRESENTATION */ /* Some rough guidelines on accessing and updating grant-table entries * in a concurrency-safe manner. For more information, Linux contains a * reference implementation for guest OSes (drivers/xen/grant_table.c, see * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD * * NB. WMB is a no-op on current-generation x86 processors. However, a * compiler barrier will still be required. * * Introducing a valid entry into the grant table: * 1. Write ent->domid. * 2. Write ent->frame: * GTF_permit_access: Frame to which access is permitted. * GTF_accept_transfer: Pseudo-phys frame slot being filled by new * frame, or zero if none. * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. * * Invalidating an unused GTF_permit_access entry: * 1. flags = ent->flags. * 2. Observe that !(flags & (GTF_reading|GTF_writing)). * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). * NB. No need for WMB as reuse of entry is control-dependent on success of * step 3, and all architectures guarantee ordering of ctrl-dep writes. * * Invalidating an in-use GTF_permit_access entry: * This cannot be done directly. Request assistance from the domain controller * which can set a timeout on the use of a grant entry and take necessary * action. (NB. This is not yet implemented!). * * Invalidating an unused GTF_accept_transfer entry: * 1. flags = ent->flags. * 2. Observe that !(flags & GTF_transfer_committed). [*] * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). * NB. No need for WMB as reuse of entry is control-dependent on success of * step 3, and all architectures guarantee ordering of ctrl-dep writes. * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. * The guest must /not/ modify the grant entry until the address of the * transferred frame is written. It is safe for the guest to spin waiting * for this to occur (detect by observing GTF_transfer_completed in * ent->flags). * * Invalidating a committed GTF_accept_transfer entry: * 1. Wait for (ent->flags & GTF_transfer_completed). * * Changing a GTF_permit_access from writable to read-only: * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. * * Changing a GTF_permit_access from read-only to writable: * Use SMP-safe bit-setting instruction. */ /* * Reference to a grant entry in a specified domain's grant table. */ typedef uint32_t grant_ref_t; /* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ /* * Version 1 of the grant table entry structure is maintained purely * for backwards compatibility. New guests should use version 2. */ #if __XEN_INTERFACE_VERSION__ < 0x0003020a #define grant_entry_v1 grant_entry #define grant_entry_v1_t grant_entry_t #endif struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ domid_t domid; /* - * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] - * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + * GTF_permit_access: GFN that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: GFN that @domid is allowed to transfer into. [GST] + * GTF_transfer_completed: MFN whose ownership transferred by @domid + * (non-translated guests only). [XEN] */ uint32_t frame; }; typedef struct grant_entry_v1 grant_entry_v1_t; /* The first few grant table entries will be preserved across grant table * version changes and may be pre-populated at domain creation by tools. */ #define GNTTAB_NR_RESERVED_ENTRIES 8 #define GNTTAB_RESERVED_CONSOLE 0 #define GNTTAB_RESERVED_XENSTORE 1 /* * Type of grant entry. * GTF_invalid: This grant entry grants no privileges. * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. * GTF_transitive: Allow @domid to transitively access a subrange of * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) #define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* * Subflags for GTF_permit_access. * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] * GTF_sub_page: Grant access to only a subrange of the page. @domid * will only be allowed to copy from the grant, and not * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) #define _GTF_reading (3) #define GTF_reading (1U<<_GTF_reading) #define _GTF_writing (4) #define GTF_writing (1U<<_GTF_writing) #define _GTF_PWT (5) #define GTF_PWT (1U<<_GTF_PWT) #define _GTF_PCD (6) #define GTF_PCD (1U<<_GTF_PCD) #define _GTF_PAT (7) #define GTF_PAT (1U<<_GTF_PAT) #define _GTF_sub_page (8) #define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: * GTF_transfer_committed: Xen sets this flag to indicate that it is committed * to transferring ownership of a page frame. When a guest sees this flag * it must /not/ modify the grant entry until GTF_transfer_completed is * set by Xen. * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag * after reading GTF_transfer_committed. Xen will always write the frame * address, followed by ORing this flag, in a timely manner. */ #define _GTF_transfer_committed (2) #define GTF_transfer_committed (1U<<_GTF_transfer_committed) #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) /* * Version 2 grant table entries. These fulfil the same role as * version 1 entries, but can represent more complicated operations. * Any given domain will have either a version 1 or a version 2 table, * and every entry in the table will be the same version. * * The interface by which domains use grant references does not depend * on the grant table version in use by the other domain. */ #if __XEN_INTERFACE_VERSION__ >= 0x0003020a /* * Version 1 and version 2 grant entries share a common prefix. The * fields of the prefix are documented as part of struct * grant_entry_v1. */ struct grant_entry_header { uint16_t flags; domid_t domid; }; typedef struct grant_entry_header grant_entry_header_t; /* * Version 2 of the grant entry structure. */ union grant_entry_v2 { grant_entry_header_t hdr; /* * This member is used for V1-style full page grants, where either: * * -- hdr.type is GTF_accept_transfer, or * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. * * In that case, the frame field has the same semantics as the * field of the same name in the V1 entry structure. */ struct { grant_entry_header_t hdr; uint32_t pad0; uint64_t frame; } full_page; /* * If the grant type is GTF_grant_access and GTF_sub_page is set, * @domid is allowed to access bytes [@page_off,@page_off+@length) * in frame @frame. */ struct { grant_entry_header_t hdr; uint16_t page_off; uint16_t length; uint64_t frame; } sub_page; /* * If the grant is GTF_transitive, @domid is allowed to use the * grant @gref in domain @trans_domid, as if it was the local * domain. Obviously, the transitive access must be compatible * with the original grant. * * The current version of Xen does not allow transitive grants * to be mapped. */ struct { grant_entry_header_t hdr; domid_t trans_domid; uint16_t pad0; grant_ref_t gref; } transitive; uint32_t __spacer[4]; /* Pad to a power of two */ }; typedef union grant_entry_v2 grant_entry_v2_t; typedef uint16_t grant_status_t; #endif /* __XEN_INTERFACE_VERSION__ */ /*********************************** * GRANT TABLE QUERIES AND USES */ /* ` enum neg_errnoval * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, * ` void *args, * ` unsigned int count) * ` * * @args points to an array of a per-command data structure. The array * has @count members */ /* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ #define GNTTABOP_map_grant_ref 0 #define GNTTABOP_unmap_grant_ref 1 #define GNTTABOP_setup_table 2 #define GNTTABOP_dump_table 3 #define GNTTABOP_transfer 4 #define GNTTABOP_copy 5 #define GNTTABOP_query_size 6 #define GNTTABOP_unmap_and_replace 7 #if __XEN_INTERFACE_VERSION__ >= 0x0003020a #define GNTTABOP_set_version 8 #define GNTTABOP_get_status_frames 9 #define GNTTABOP_get_version 10 #define GNTTABOP_swap_grant_ref 11 +#define GNTTABOP_cache_flush 12 #endif /* __XEN_INTERFACE_VERSION__ */ /* ` } */ /* * Handle to track a mapping created via a grant reference. */ typedef uint32_t grant_handle_t; /* * GNTTABOP_map_grant_ref: Map the grant entry (,) for access * by devices and/or host CPUs. If successful, is a tracking number - * that must be presented later to destroy the mapping(s). On error, + * that must be presented later to destroy the mapping(s). On error, * is a negative status code. * NOTES: * 1. If GNTMAP_device_map is specified then is the address * via which I/O devices may access the granted frame. * 2. If GNTMAP_host_map is specified then a mapping will be added at * either a host virtual address in the current address space, or at * a PTE at the specified machine address. The type of mapping to * perform is selected through the GNTMAP_contains_pte flag, and the * address is specified in . * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a * host mapping is destroyed by other means then it is *NOT* guaranteed * to be accounted to the correct grant reference! */ struct gnttab_map_grant_ref { /* IN parameters. */ uint64_t host_addr; uint32_t flags; /* GNTMAP_* */ grant_ref_t ref; domid_t dom; /* OUT parameters. */ int16_t status; /* => enum grant_status */ grant_handle_t handle; uint64_t dev_bus_addr; }; typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); /* * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings * tracked by . If or is zero, that * field is ignored. If non-zero, they must refer to a device/host mapping * that is tracked by * NOTES: * 1. The call may fail in an undefined manner if either mapping is not * tracked by . * 3. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ struct gnttab_unmap_grant_ref { /* IN parameters. */ uint64_t host_addr; uint64_t dev_bus_addr; grant_handle_t handle; /* OUT parameters. */ int16_t status; /* => enum grant_status */ }; typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); /* * GNTTABOP_setup_table: Set up a grant table for comprising at least * pages. The frame addresses are written to the . * Only addresses are written, even if the table is larger. * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. * 3. Xen may not support more than a single grant-table page per domain. */ struct gnttab_setup_table { /* IN parameters. */ domid_t dom; uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* => enum grant_status */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 XEN_GUEST_HANDLE(ulong) frame_list; +#else + XEN_GUEST_HANDLE(xen_pfn_t) frame_list; +#endif }; typedef struct gnttab_setup_table gnttab_setup_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); /* * GNTTABOP_dump_table: Dump the contents of the grant table to the * xen console. Debugging use only. */ struct gnttab_dump_table { /* IN parameters. */ domid_t dom; /* OUT parameters. */ int16_t status; /* => enum grant_status */ }; typedef struct gnttab_dump_table gnttab_dump_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); /* * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The * foreign domain has previously registered its interest in the transfer via * . * * Note that, even if the transfer fails, the specified page no longer belongs * to the calling domain *unless* the error is GNTST_bad_page. */ struct gnttab_transfer { /* IN parameters. */ xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ int16_t status; }; typedef struct gnttab_transfer gnttab_transfer_t; DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); /* * GNTTABOP_copy: Hypervisor based copy * source and destinations can be eithers MFNs or, for foreign domains, * grant references. the foreign domain has to grant read/write access * in its grant table. * * The flags specify what type source and destinations are (either MFN * or grant reference). * * Note that this can also be used to copy data between two domains * via a third party if the source and destination domains had previously * grant appropriate access to their pages to the third party. * * source_offset specifies an offset in the source frame, dest_offset * the offset in the target frame and len specifies the number of * bytes to be copied. */ #define _GNTCOPY_source_gref (0) #define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) #define _GNTCOPY_dest_gref (1) #define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) -#define _GNTCOPY_can_fail (2) -#define GNTCOPY_can_fail (1<<_GNTCOPY_can_fail) struct gnttab_copy { /* IN parameters. */ - struct { + struct gnttab_copy_ptr { union { grant_ref_t ref; xen_pfn_t gmfn; } u; domid_t domid; uint16_t offset; } source, dest; uint16_t len; uint16_t flags; /* GNTCOPY_* */ /* OUT parameters. */ int16_t status; }; typedef struct gnttab_copy gnttab_copy_t; DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); /* * GNTTABOP_query_size: Query the current and maximum sizes of the shared * grant table. * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. */ struct gnttab_query_size { /* IN parameters. */ domid_t dom; /* OUT parameters. */ uint32_t nr_frames; uint32_t max_nr_frames; int16_t status; /* => enum grant_status */ }; typedef struct gnttab_query_size gnttab_query_size_t; DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); /* * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings * tracked by but atomically replace the page table entry with one * pointing to the machine address under . will be * redirected to the null entry. * NOTES: * 1. The call may fail in an undefined manner if either mapping is not * tracked by . * 2. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ struct gnttab_unmap_and_replace { /* IN parameters. */ uint64_t host_addr; uint64_t new_addr; grant_handle_t handle; /* OUT parameters. */ int16_t status; /* => enum grant_status */ }; typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); #if __XEN_INTERFACE_VERSION__ >= 0x0003020a /* * GNTTABOP_set_version: Request a particular version of the grant * table shared table structure. This operation can only be performed * once in any given domain. It must be performed before any grants * are activated; otherwise, the domain will be stuck with version 1. * The only defined versions are 1 and 2. */ struct gnttab_set_version { /* IN/OUT parameters */ uint32_t version; }; typedef struct gnttab_set_version gnttab_set_version_t; DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); /* * GNTTABOP_get_status_frames: Get the list of frames used to store grant * status for . In grant format version 2, the status is separated * from the other shared grant fields to allow more efficient synchronization * using barriers instead of atomic cmpexch operations. * specify the size of vector . * The frame addresses are returned in the . * Only addresses are returned, even if the table is larger. * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. */ struct gnttab_get_status_frames { /* IN parameters. */ uint32_t nr_frames; domid_t dom; /* OUT parameters. */ int16_t status; /* => enum grant_status */ XEN_GUEST_HANDLE(uint64_t) frame_list; }; typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); /* * GNTTABOP_get_version: Get the grant table version which is in * effect for domain . */ struct gnttab_get_version { /* IN parameters */ domid_t dom; uint16_t pad; /* OUT parameters */ uint32_t version; }; typedef struct gnttab_get_version gnttab_get_version_t; DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); /* * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. */ struct gnttab_swap_grant_ref { /* IN parameters */ grant_ref_t ref_a; grant_ref_t ref_b; /* OUT parameters */ int16_t status; /* => enum grant_status */ }; typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint16_t offset; /* offset from start of grant */ + uint16_t length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1<<0) +#define GNTTAB_CACHE_INVAL (1<<1) +#define GNTTAB_CACHE_SOURCE_GREF (1<<31) + uint32_t op; +}; +typedef struct gnttab_cache_flush gnttab_cache_flush_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t); + #endif /* __XEN_INTERFACE_VERSION__ */ /* * Bitfield values for gnttab_map_grant_ref.flags. */ /* Map the grant entry for access by I/O devices. */ #define _GNTMAP_device_map (0) #define GNTMAP_device_map (1<<_GNTMAP_device_map) /* Map the grant entry for access by host CPUs. */ #define _GNTMAP_host_map (1) #define GNTMAP_host_map (1<<_GNTMAP_host_map) /* Accesses to the granted frame will be restricted to read-only access. */ #define _GNTMAP_readonly (2) #define GNTMAP_readonly (1<<_GNTMAP_readonly) /* * GNTMAP_host_map subflag: * 0 => The host mapping is usable only by the guest OS. * 1 => The host mapping is usable by guest OS + current application. */ #define _GNTMAP_application_map (3) #define GNTMAP_application_map (1<<_GNTMAP_application_map) /* * GNTMAP_contains_pte subflag: * 0 => This map request contains a host virtual address. * 1 => This map request contains the machine addess of the PTE to update. */ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) #define _GNTMAP_can_fail (5) #define GNTMAP_can_fail (1<<_GNTMAP_can_fail) /* * Bits to be placed in guest kernel available PTE bits (architecture * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). */ #define _GNTMAP_guest_avail0 (16) #define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) /* * Values for error status returns. All errors are -ve. */ /* ` enum grant_status { */ #define GNTST_okay (0) /* Normal return. */ #define GNTST_general_error (-1) /* General undefined error. */ #define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ #define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ #define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ #define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ #define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ #define GNTST_address_too_big (-11) /* transfer page address too large. */ #define GNTST_eagain (-12) /* Operation not done; try again. */ /* ` } */ #define GNTTABOP_error_msgs { \ "okay", \ "undefined error", \ "unrecognised domain id", \ "invalid grant reference", \ "invalid mapping handle", \ "invalid virtual address", \ "invalid device address", \ "no spare translation slot in the I/O MMU", \ "permission denied", \ "bad page", \ "copy arguments cross page boundary", \ "page address size too large", \ "operation not done; try again" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/hvm/e820.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/e820.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/hvm/e820.h (revision 288926) @@ -1,34 +1,35 @@ - /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser */ #ifndef __XEN_PUBLIC_HVM_E820_H__ #define __XEN_PUBLIC_HVM_E820_H__ /* E820 location in HVM virtual address space. */ #define HVM_E820_PAGE 0x00090000 #define HVM_E820_NR_OFFSET 0x000001E8 #define HVM_E820_OFFSET 0x000002D0 #define HVM_BELOW_4G_RAM_END 0xF0000000 #define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END #define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) #endif /* __XEN_PUBLIC_HVM_E820_H__ */ Index: projects/clang370-import/sys/xen/interface/hvm/hvm_info_table.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/hvm_info_table.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/hvm/hvm_info_table.h (revision 288926) @@ -1,72 +1,74 @@ /****************************************************************************** * hvm/hvm_info_table.h * * HVM parameter and information table, written into guest memory map. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser */ #ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ #define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ #define HVM_INFO_PFN 0x09F #define HVM_INFO_OFFSET 0x800 #define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) /* Maximum we can support with current vLAPIC ID mapping. */ #define HVM_MAX_VCPUS 128 struct hvm_info_table { char signature[8]; /* "HVM INFO" */ uint32_t length; uint8_t checksum; /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ uint8_t apic_mode; /* How many CPUs does this domain have? */ uint32_t nr_vcpus; /* * MEMORY MAP provided by HVM domain builder. * Notes: * 1. page_to_phys(x) = x << 12 * 2. If a field is zero, the corresponding range does not exist. */ /* * 0x0 to page_to_phys(low_mem_pgend)-1: * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) */ uint32_t low_mem_pgend; /* * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: * Reserved for special memory mappings */ uint32_t reserved_mem_pgstart; /* * 0x100000000 to page_to_phys(high_mem_pgend)-1: * RAM above 4GB */ uint32_t high_mem_pgend; /* Bitmap of which CPUs are online at boot time. */ uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; }; #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ Index: projects/clang370-import/sys/xen/interface/hvm/hvm_op.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/hvm_op.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/hvm/hvm_op.h (revision 288926) @@ -1,275 +1,502 @@ /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007, Keir Fraser */ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ #include "../xen.h" #include "../trace.h" +#include "../event_channel.h" /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ #define HVMOP_set_param 0 #define HVMOP_get_param 1 struct xen_hvm_param { domid_t domid; /* IN */ uint32_t index; /* IN */ uint64_t value; /* IN/OUT */ }; typedef struct xen_hvm_param xen_hvm_param_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); /* Set the logical level of one of a domain's PCI INTx wires. */ #define HVMOP_set_pci_intx_level 2 struct xen_hvm_set_pci_intx_level { /* Domain to be updated. */ domid_t domid; /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ uint8_t domain, bus, device, intx; /* Assertion level (0 = unasserted, 1 = asserted). */ uint8_t level; }; typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); /* Set the logical level of one of a domain's ISA IRQ wires. */ #define HVMOP_set_isa_irq_level 3 struct xen_hvm_set_isa_irq_level { /* Domain to be updated. */ domid_t domid; /* ISA device identification, by ISA IRQ (0-15). */ uint8_t isa_irq; /* Assertion level (0 = unasserted, 1 = asserted). */ uint8_t level; }; typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); #define HVMOP_set_pci_link_route 4 struct xen_hvm_set_pci_link_route { /* Domain to be updated. */ domid_t domid; /* PCI link identifier (0-3). */ uint8_t link; /* ISA IRQ (1-15), or 0 (disable link). */ uint8_t isa_irq; }; typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 typedef enum { HVMMEM_ram_rw, /* Normal read/write guest RAM */ HVMMEM_ram_ro, /* Read-only; writes are discarded */ HVMMEM_mmio_dm, /* Reads and write go to the device model */ + HVMMEM_mmio_write_dm /* Read-only; writes go to the device model */ } hvmmem_type_t; /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Track dirty VRAM. */ #define HVMOP_track_dirty_vram 6 struct xen_hvm_track_dirty_vram { /* Domain to be tracked. */ domid_t domid; + /* Number of pages to track. */ + uint32_t nr; /* First pfn to track. */ uint64_aligned_t first_pfn; - /* Number of pages to track. */ - uint64_aligned_t nr; /* OUT variable. */ /* Dirty bitmap buffer. */ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; }; typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); /* Notify that some pages got modified by the Device Model. */ #define HVMOP_modified_memory 7 struct xen_hvm_modified_memory { /* Domain to be updated. */ domid_t domid; + /* Number of pages. */ + uint32_t nr; /* First pfn. */ uint64_aligned_t first_pfn; - /* Number of pages. */ - uint64_aligned_t nr; }; typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); #define HVMOP_set_mem_type 8 /* Notify that a region of memory is to be treated in a specific way. */ struct xen_hvm_set_mem_type { /* Domain to be updated. */ domid_t domid; /* Memory type */ uint16_t hvmmem_type; /* Number of pages. */ uint32_t nr; /* First pfn. */ uint64_aligned_t first_pfn; }; typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ /* Hint from PV drivers for pagetable destruction. */ #define HVMOP_pagetable_dying 9 struct xen_hvm_pagetable_dying { /* Domain with a pagetable about to be destroyed. */ domid_t domid; uint16_t pad[3]; /* align next field on 8-byte boundary */ /* guest physical address of the toplevel pagetable dying */ uint64_t gpa; }; typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); /* Get the current Xen time, in nanoseconds since system boot. */ #define HVMOP_get_time 10 struct xen_hvm_get_time { uint64_t now; /* OUT */ }; typedef struct xen_hvm_get_time xen_hvm_get_time_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); #define HVMOP_xentrace 11 struct xen_hvm_xentrace { uint16_t event, extra_bytes; uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; }; typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Deprecated by XENMEM_access_op_set_access */ #define HVMOP_set_mem_access 12 -typedef enum { - HVMMEM_access_n, - HVMMEM_access_r, - HVMMEM_access_w, - HVMMEM_access_rw, - HVMMEM_access_x, - HVMMEM_access_rx, - HVMMEM_access_wx, - HVMMEM_access_rwx, - HVMMEM_access_rx2rw, /* Page starts off as r-x, but automatically - * change to r-w on a write */ - HVMMEM_access_n2rwx, /* Log access: starts off as n, automatically - * goes to rwx, generating an event without - * pausing the vcpu */ - HVMMEM_access_default /* Take the domain default */ -} hvmmem_access_t; -/* Notify that a region of memory is to have specific access types */ -struct xen_hvm_set_mem_access { - /* Domain to be updated. */ - domid_t domid; - /* Memory type */ - uint16_t hvmmem_access; /* hvm_access_t */ - /* Number of pages, ignored on setting default access */ - uint32_t nr; - /* First pfn, or ~0ull to set the default access for new pages */ - uint64_aligned_t first_pfn; -}; -typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t); +/* Deprecated by XENMEM_access_op_get_access */ #define HVMOP_get_mem_access 13 -/* Get the specific access type for that region of memory */ -struct xen_hvm_get_mem_access { - /* Domain to be queried. */ - domid_t domid; - /* Memory type: OUT */ - uint16_t hvmmem_access; /* hvm_access_t */ - /* pfn, or ~0ull for default access for new pages. IN */ - uint64_aligned_t pfn; -}; -typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t); #define HVMOP_inject_trap 14 /* Inject a trap into a VCPU, which will get taken up on the next * scheduling of it. Note that the caller should know enough of the * state of the CPU before injecting, to know what the effect of * injecting the trap will be. */ struct xen_hvm_inject_trap { /* Domain to be queried. */ domid_t domid; /* VCPU */ uint32_t vcpuid; /* Vector number */ uint32_t vector; /* Trap type (HVMOP_TRAP_*) */ uint32_t type; /* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */ # define HVMOP_TRAP_ext_int 0 /* external interrupt */ # define HVMOP_TRAP_nmi 2 /* nmi */ # define HVMOP_TRAP_hw_exc 3 /* hardware exception */ # define HVMOP_TRAP_sw_int 4 /* software interrupt (CD nn) */ # define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */ # define HVMOP_TRAP_sw_exc 6 /* INT3 (CC), INTO (CE) */ /* Error code, or ~0u to skip */ uint32_t error_code; /* Intruction length */ uint32_t insn_len; /* CR2 for page faults */ uint64_aligned_t cr2; }; typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #define HVMOP_get_mem_type 15 /* Return hvmmem_type_t for the specified pfn. */ struct xen_hvm_get_mem_type { /* Domain to be queried. */ domid_t domid; /* OUT variable. */ uint16_t mem_type; uint16_t pad[2]; /* align next field on 8-byte boundary */ /* IN variable. */ uint64_t pfn; }; typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) /* MSI injection for emulated devices */ #define HVMOP_inject_msi 16 struct xen_hvm_inject_msi { /* Domain to be injected */ domid_t domid; /* Data -- lower 32 bits */ uint32_t data; /* Address (0xfeexxxxx) */ uint64_t addr; }; typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); +/* + * IOREQ Servers + * + * The interface between an I/O emulator an Xen is called an IOREQ Server. + * A domain supports a single 'legacy' IOREQ Server which is instantiated if + * parameter... + * + * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the synchronous + * ioreq structures), or... + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the buffered + * ioreq ring), or... + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses + * to request buffered I/O emulation). + * + * The following hypercalls facilitate the creation of IOREQ Servers for + * 'secondary' emulators which are invoked to implement port I/O, memory, or + * PCI config space ranges which they explicitly register. + */ + +typedef uint16_t ioservid_t; + +/* + * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a secondary + * emulator servicing domain . + * + * The handed back is unique for . If is zero + * the buffered ioreq ring will not be allocated and hence all emulation + * requestes to this server will be synchronous. + */ +#define HVMOP_create_ioreq_server 17 +struct xen_hvm_create_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ +#define HVM_IOREQSRV_BUFIOREQ_OFF 0 +#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1 +/* + * Use this when read_pointer gets updated atomically and + * the pointer pair gets read atomically: + */ +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 + uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */ + ioservid_t id; /* OUT - server id */ +}; +typedef struct xen_hvm_create_ioreq_server xen_hvm_create_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_create_ioreq_server_t); + +/* + * HVMOP_get_ioreq_server_info: Get all the information necessary to access + * IOREQ Server . + * + * The emulator needs to map the synchronous ioreq structures and buffered + * ioreq ring (if it exists) that Xen uses to request emulation. These are + * hosted in domain 's gmfns and + * respectively. In addition, if the IOREQ Server is handling buffered + * emulation requests, the emulator needs to bind to event channel + * to listen for them. (The event channels used for + * synchronous emulation requests are specified in the per-CPU ioreq + * structures in ). + * If the IOREQ Server is not handling buffered emulation requests then the + * values handed back in and will both be 0. + */ +#define HVMOP_get_ioreq_server_info 18 +struct xen_hvm_get_ioreq_server_info { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */ + uint64_aligned_t ioreq_pfn; /* OUT - sync ioreq pfn */ + uint64_aligned_t bufioreq_pfn; /* OUT - buffered ioreq pfn */ +}; +typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_info_t); + +/* + * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain + * for emulation by the client of IOREQ + * Server + * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of + * for emulation by the client of IOREQ + * Server + * + * There are three types of I/O that can be emulated: port I/O, memory accesses + * and PCI config space accesses. The field denotes which type of range + * the and (inclusive) fields are specifying. + * PCI config space ranges are specified by segment/bus/device/function values + * which should be encoded using the HVMOP_PCI_SBDF helper macro below. + * + * NOTE: unless an emulation request falls entirely within a range mapped + * by a secondary emulator, it will not be passed to that emulator. + */ +#define HVMOP_map_io_range_to_ioreq_server 19 +#define HVMOP_unmap_io_range_from_ioreq_server 20 +struct xen_hvm_io_range { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint32_t type; /* IN - type of range */ +# define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ +# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ +# define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range */ + uint64_aligned_t start, end; /* IN - inclusive start and end of range */ +}; +typedef struct xen_hvm_io_range xen_hvm_io_range_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_io_range_t); + +#define HVMOP_PCI_SBDF(s,b,d,f) \ + ((((s) & 0xffff) << 16) | \ + (((b) & 0xff) << 8) | \ + (((d) & 0x1f) << 3) | \ + ((f) & 0x07)) + +/* + * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server servicing domain + * . + * + * Any registered I/O ranges will be automatically deregistered. + */ +#define HVMOP_destroy_ioreq_server 21 +struct xen_hvm_destroy_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ +}; +typedef struct xen_hvm_destroy_ioreq_server xen_hvm_destroy_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_destroy_ioreq_server_t); + +/* + * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server servicing + * domain . + * + * The IOREQ Server will not be passed any emulation requests until it is in the + * enabled state. + * Note that the contents of the ioreq_pfn and bufioreq_fn (see + * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server is in + * the enabled state. + */ +#define HVMOP_set_ioreq_server_state 22 +struct xen_hvm_set_ioreq_server_state { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint8_t enabled; /* IN - enabled? */ +}; +typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t); + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#if defined(__i386__) || defined(__x86_64__) + +/* + * HVMOP_set_evtchn_upcall_vector: Set a that should be used for event + * channel upcalls on the specified . If set, + * this vector will be used in preference to the + * domain global callback via (see + * HVM_PARAM_CALLBACK_IRQ). + */ +#define HVMOP_set_evtchn_upcall_vector 23 +struct xen_hvm_evtchn_upcall_vector { + uint32_t vcpu; + uint8_t vector; +}; +typedef struct xen_hvm_evtchn_upcall_vector xen_hvm_evtchn_upcall_vector_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t); + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#define HVMOP_guest_request_vm_event 24 + +/* HVMOP_altp2m: perform altp2m state operations */ +#define HVMOP_altp2m 25 + +#define HVMOP_ALTP2M_INTERFACE_VERSION 0x00000001 + +struct xen_hvm_altp2m_domain_state { + /* IN or OUT variable on/off */ + uint8_t state; +}; +typedef struct xen_hvm_altp2m_domain_state xen_hvm_altp2m_domain_state_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_domain_state_t); + +struct xen_hvm_altp2m_vcpu_enable_notify { + uint32_t vcpu_id; + uint32_t pad; + /* #VE info area gfn */ + uint64_t gfn; +}; +typedef struct xen_hvm_altp2m_vcpu_enable_notify xen_hvm_altp2m_vcpu_enable_notify_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_enable_notify_t); + +struct xen_hvm_altp2m_view { + /* IN/OUT variable */ + uint16_t view; + /* Create view only: default access type + * NOTE: currently ignored */ + uint16_t hvmmem_default_access; /* xenmem_access_t */ +}; +typedef struct xen_hvm_altp2m_view xen_hvm_altp2m_view_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_view_t); + +struct xen_hvm_altp2m_set_mem_access { + /* view */ + uint16_t view; + /* Memory type */ + uint16_t hvmmem_access; /* xenmem_access_t */ + uint32_t pad; + /* gfn */ + uint64_t gfn; +}; +typedef struct xen_hvm_altp2m_set_mem_access xen_hvm_altp2m_set_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_set_mem_access_t); + +struct xen_hvm_altp2m_change_gfn { + /* view */ + uint16_t view; + uint16_t pad1; + uint32_t pad2; + /* old gfn */ + uint64_t old_gfn; + /* new gfn, INVALID_GFN (~0UL) means revert */ + uint64_t new_gfn; +}; +typedef struct xen_hvm_altp2m_change_gfn xen_hvm_altp2m_change_gfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_change_gfn_t); + +struct xen_hvm_altp2m_op { + uint32_t version; /* HVMOP_ALTP2M_INTERFACE_VERSION */ + uint32_t cmd; +/* Get/set the altp2m state for a domain */ +#define HVMOP_altp2m_get_domain_state 1 +#define HVMOP_altp2m_set_domain_state 2 +/* Set the current VCPU to receive altp2m event notifications */ +#define HVMOP_altp2m_vcpu_enable_notify 3 +/* Create a new view */ +#define HVMOP_altp2m_create_p2m 4 +/* Destroy a view */ +#define HVMOP_altp2m_destroy_p2m 5 +/* Switch view for an entire domain */ +#define HVMOP_altp2m_switch_p2m 6 +/* Notify that a page of memory is to have specific access types */ +#define HVMOP_altp2m_set_mem_access 7 +/* Change a p2m entry to have a different gfn->mfn mapping */ +#define HVMOP_altp2m_change_gfn 8 + domid_t domain; + uint16_t pad1; + uint32_t pad2; + union { + struct xen_hvm_altp2m_domain_state domain_state; + struct xen_hvm_altp2m_vcpu_enable_notify enable_notify; + struct xen_hvm_altp2m_view view; + struct xen_hvm_altp2m_set_mem_access set_mem_access; + struct xen_hvm_altp2m_change_gfn change_gfn; + uint8_t pad[64]; + } u; +}; +typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t); + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Index: projects/clang370-import/sys/xen/interface/hvm/hvm_xs_strings.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/hvm_xs_strings.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/hvm/hvm_xs_strings.h (revision 288926) @@ -0,0 +1,82 @@ +/****************************************************************************** + * hvm/hvm_xs_strings.h + * + * HVM xenstore strings used in HVMLOADER. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2013, Citrix Systems + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ +#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ + +#define HVM_XS_HVMLOADER "hvmloader" +#define HVM_XS_BIOS "hvmloader/bios" +#define HVM_XS_GENERATION_ID_ADDRESS "hvmloader/generation-id-address" +#define HVM_XS_ALLOW_MEMORY_RELOCATE "hvmloader/allow-memory-relocate" + +/* The following values allow additional ACPI tables to be added to the + * virtual ACPI BIOS that hvmloader constructs. The values specify the guest + * physical address and length of a block of ACPI tables to add. The format of + * the block is simply concatenated raw tables (which specify their own length + * in the ACPI header). + */ +#define HVM_XS_ACPI_PT_ADDRESS "hvmloader/acpi/address" +#define HVM_XS_ACPI_PT_LENGTH "hvmloader/acpi/length" + +/* Any number of SMBIOS types can be passed through to an HVM guest using + * the following xenstore values. The values specify the guest physical + * address and length of a block of SMBIOS structures for hvmloader to use. + * The block is formatted in the following way: + * + * ... + * + * Each length separator is a 32b integer indicating the length of the next + * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct + * will replace the default structure in hvmloader. In addition, any + * OEM/vendortypes (128 - 255) will all be added. + */ +#define HVM_XS_SMBIOS_PT_ADDRESS "hvmloader/smbios/address" +#define HVM_XS_SMBIOS_PT_LENGTH "hvmloader/smbios/length" + +/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */ +#define HVM_XS_SMBIOS_DEFAULT_BATTERY "hvmloader/smbios/default_battery" + +/* The following xenstore values are used to override some of the default + * string values in the SMBIOS table constructed in hvmloader. + */ +#define HVM_XS_BIOS_STRINGS "bios-strings" +#define HVM_XS_BIOS_VENDOR "bios-strings/bios-vendor" +#define HVM_XS_BIOS_VERSION "bios-strings/bios-version" +#define HVM_XS_SYSTEM_MANUFACTURER "bios-strings/system-manufacturer" +#define HVM_XS_SYSTEM_PRODUCT_NAME "bios-strings/system-product-name" +#define HVM_XS_SYSTEM_VERSION "bios-strings/system-version" +#define HVM_XS_SYSTEM_SERIAL_NUMBER "bios-strings/system-serial-number" +#define HVM_XS_ENCLOSURE_MANUFACTURER "bios-strings/enclosure-manufacturer" +#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number" +#define HVM_XS_BATTERY_MANUFACTURER "bios-strings/battery-manufacturer" +#define HVM_XS_BATTERY_DEVICE_NAME "bios-strings/battery-device-name" + +/* 1 to 99 OEM strings can be set in xenstore using values of the form + * below. These strings will be loaded into the SMBIOS type 11 structure. + */ +#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d" + +#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */ Property changes on: projects/clang370-import/sys/xen/interface/hvm/hvm_xs_strings.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/hvm/ioreq.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/ioreq.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/hvm/ioreq.h (revision 288926) @@ -1,140 +1,138 @@ /* * ioreq.h: I/O request definitions for device models * Copyright (c) 2004, Intel Corporation. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef _IOREQ_H_ #define _IOREQ_H_ #define IOREQ_READ 1 #define IOREQ_WRITE 0 #define STATE_IOREQ_NONE 0 #define STATE_IOREQ_READY 1 #define STATE_IOREQ_INPROCESS 2 #define STATE_IORESP_READY 3 #define IOREQ_TYPE_PIO 0 /* pio */ #define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 #define IOREQ_TYPE_TIMEOFFSET 7 #define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ /* * VMExit dispatcher should cooperate with instruction decoder to * prepare this structure and notify service OS and DM by sending - * virq + * virq. + * + * For I/O type IOREQ_TYPE_PCI_CONFIG, the physical address is formatted + * as follows: + * + * 63....48|47..40|39..35|34..32|31........0 + * SEGMENT |BUS |DEV |FN |OFFSET */ struct ioreq { uint64_t addr; /* physical address */ uint64_t data; /* data (or paddr of data) */ uint32_t count; /* for rep prefixes */ uint32_t size; /* size in bytes */ uint32_t vp_eport; /* evtchn for notifications to/from device model */ uint16_t _pad0; uint8_t state:4; uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr * of the real data to use. */ uint8_t dir:1; /* 1=read, 0=write */ uint8_t df:1; uint8_t _pad1:1; uint8_t type; /* I/O type */ }; typedef struct ioreq ioreq_t; struct shared_iopage { struct ioreq vcpu_ioreq[1]; }; typedef struct shared_iopage shared_iopage_t; struct buf_ioreq { uint8_t type; /* I/O type */ uint8_t pad:1; uint8_t dir:1; /* 1=read, 0=write */ uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ uint32_t addr:20;/* physical address */ uint32_t data; /* data */ }; typedef struct buf_ioreq buf_ioreq_t; #define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ struct buffered_iopage { - unsigned int read_pointer; - unsigned int write_pointer; +#ifdef __XEN__ + union bufioreq_pointers { + struct { +#endif + uint32_t read_pointer; + uint32_t write_pointer; +#ifdef __XEN__ + }; + uint64_t full; + } ptrs; +#endif buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; }; /* NB. Size of this structure must be no greater than one page. */ typedef struct buffered_iopage buffered_iopage_t; -#if defined(__ia64__) -struct pio_buffer { - uint32_t page_offset; - uint32_t pointer; - uint32_t data_end; - uint32_t buf_size; - void *opaque; -}; - -#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ -#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ -#define PIO_BUFFER_ENTRY_NUM 2 -struct buffered_piopage { - struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; - uint8_t buffer[1]; -}; -#endif /* defined(__ia64__) */ - /* * ACPI Control/Event register locations. Location is controlled by a * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. */ /* Version 0 (default): Traditional Xen locations. */ #define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 #define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) #define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) #define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) #define ACPI_GPE0_BLK_LEN_V0 0x08 /* Version 1: Locations preferred by modern Qemu. */ #define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 #define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) #define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) #define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 #define ACPI_GPE0_BLK_LEN_V1 0x04 /* Compatibility definitions for the default location (version 0). */ #define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 #define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 #define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 #define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 #define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 #endif /* _IOREQ_H_ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/hvm/params.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/params.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/hvm/params.h (revision 288926) @@ -1,152 +1,197 @@ /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007, Keir Fraser */ #ifndef __XEN_PUBLIC_HVM_PARAMS_H__ #define __XEN_PUBLIC_HVM_PARAMS_H__ #include "hvm_op.h" /* * Parameter space for HVMOP_{set,get}_param. */ /* * How should CPU0 event-channel notifications be delivered? * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: * Domain = val[47:32], Bus = val[31:16], * DevFn = val[15: 8], IntX = val[ 1: 0] * val[63:56] == 2: val[7:0] is a vector number, check for * XENFEAT_hvm_callback_vector to know if this delivery * method is available. * If val == 0 then CPU0 event-channel notifications are not delivered. */ #define HVM_PARAM_CALLBACK_IRQ 0 /* * These are not used by Xen. They are here for convenience of HVM-guest * xenbus implementations. */ #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 #define HVM_PARAM_PAE_ENABLED 4 #define HVM_PARAM_IOREQ_PFN 5 #define HVM_PARAM_BUFIOREQ_PFN 6 #define HVM_PARAM_BUFIOREQ_EVTCHN 26 -#ifdef __ia64__ +#if defined(__i386__) || defined(__x86_64__) -#define HVM_PARAM_NVRAM_FD 7 -#define HVM_PARAM_VHPT_SIZE 8 -#define HVM_PARAM_BUFPIOREQ_PFN 9 +/* + * Viridian enlightenments + * + * (See http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx) + * + * To expose viridian enlightenments to the guest set this parameter + * to the desired feature mask. The base feature set must be present + * in any valid feature mask. + */ +#define HVM_PARAM_VIRIDIAN 9 -#elif defined(__i386__) || defined(__x86_64__) +/* Base+Freq viridian feature sets: + * + * - Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) + * - APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) + * - Virtual Processor index MSR (HV_X64_MSR_VP_INDEX) + * - Timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY) + */ +#define _HVMPV_base_freq 0 +#define HVMPV_base_freq (1 << _HVMPV_base_freq) -/* Expose Viridian interfaces to this HVM guest? */ -#define HVM_PARAM_VIRIDIAN 9 +/* Feature set modifications */ +/* Disable timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY). + * This modification restores the viridian feature set to the + * original 'base' set exposed in releases prior to Xen 4.4. + */ +#define _HVMPV_no_freq 1 +#define HVMPV_no_freq (1 << _HVMPV_no_freq) + +/* Enable Partition Time Reference Counter (HV_X64_MSR_TIME_REF_COUNT) */ +#define _HVMPV_time_ref_count 2 +#define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count) + +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */ +#define _HVMPV_reference_tsc 3 +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) + +#define HVMPV_feature_mask \ + (HVMPV_base_freq | \ + HVMPV_no_freq | \ + HVMPV_time_ref_count | \ + HVMPV_reference_tsc) + #endif /* * Set mode for virtual timers (currently x86 only): * delay_for_missed_ticks (default): * Do not advance a vcpu's time beyond the correct delivery time for * interrupts that have been missed due to preemption. Deliver missed * interrupts when the vcpu is rescheduled and advance the vcpu's virtual * time stepwise for each one. * no_delay_for_missed_ticks: * As above, missed interrupts are delivered, but guest time always tracks * wallclock (i.e., real) time while doing so. * no_missed_ticks_pending: * No missed interrupts are held pending. Instead, to ensure ticks are * delivered at some non-zero rate, if we detect missed ticks then the * internal tick alarm is not disabled if the VCPU is preempted during the * next tick period. * one_missed_tick_pending: * Missed interrupts are collapsed together and delivered as one 'late tick'. * Guest time always tracks wallclock (i.e., real) time. */ #define HVM_PARAM_TIMER_MODE 10 #define HVMPTM_delay_for_missed_ticks 0 #define HVMPTM_no_delay_for_missed_ticks 1 #define HVMPTM_no_missed_ticks_pending 2 #define HVMPTM_one_missed_tick_pending 3 /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ #define HVM_PARAM_HPET_ENABLED 11 /* Identity-map page directory used by Intel EPT when CR0.PG=0. */ #define HVM_PARAM_IDENT_PT 12 /* Device Model domain, defaults to 0. */ #define HVM_PARAM_DM_DOMAIN 13 /* ACPI S state: currently support S0 and S3 on x86. */ #define HVM_PARAM_ACPI_S_STATE 14 /* TSS used on Intel when CR0.PE=0. */ #define HVM_PARAM_VM86_TSS 15 /* Boolean: Enable aligning all periodic vpts to reduce interrupts */ #define HVM_PARAM_VPT_ALIGN 16 /* Console debug shared memory ring and event channel */ #define HVM_PARAM_CONSOLE_PFN 17 #define HVM_PARAM_CONSOLE_EVTCHN 18 /* * Select location of ACPI PM1a and TMR control blocks. Currently two locations * are supported, specified by version 0 or 1 in this parameter: * - 0: default, use the old addresses * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 * - 1: use the new default qemu addresses * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 * You can find these address definitions in */ #define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 -/* Enable blocking memory events, async or sync (pause vcpu until response) - * onchangeonly indicates messages only on a change of value */ +/* Deprecated */ #define HVM_PARAM_MEMORY_EVENT_CR0 20 #define HVM_PARAM_MEMORY_EVENT_CR3 21 #define HVM_PARAM_MEMORY_EVENT_CR4 22 #define HVM_PARAM_MEMORY_EVENT_INT3 23 #define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 +#define HVM_PARAM_MEMORY_EVENT_MSR 30 -#define HVMPME_MODE_MASK (3 << 0) -#define HVMPME_mode_disabled 0 -#define HVMPME_mode_async 1 -#define HVMPME_mode_sync 2 -#define HVMPME_onchangeonly (1 << 2) - /* Boolean: Enable nestedhvm (hvm only) */ #define HVM_PARAM_NESTEDHVM 24 /* Params for the mem event rings */ #define HVM_PARAM_PAGING_RING_PFN 27 -#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_MONITOR_RING_PFN 28 #define HVM_PARAM_SHARING_RING_PFN 29 -#define HVM_NR_PARAMS 30 +/* SHUTDOWN_* action in case of a triple fault */ +#define HVM_PARAM_TRIPLE_FAULT_REASON 31 + +#define HVM_PARAM_IOREQ_SERVER_PFN 32 +#define HVM_PARAM_NR_IOREQ_SERVER_PAGES 33 + +/* Location of the VM Generation ID in guest physical address space. */ +#define HVM_PARAM_VM_GENERATION_ID_ADDR 34 + +/* Boolean: Enable altp2m */ +#define HVM_PARAM_ALTP2M 35 + +#define HVM_NR_PARAMS 36 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ Index: projects/clang370-import/sys/xen/interface/hvm/pvdrivers.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/pvdrivers.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/hvm/pvdrivers.h (revision 288926) @@ -0,0 +1,49 @@ +/* + * pvdrivers.h: Register of PV drivers product numbers. + * Copyright (c) 2012, Citrix Systems Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_PVDRIVERS_H_ +#define _XEN_PUBLIC_PVDRIVERS_H_ + +/* + * This is the master registry of product numbers for + * PV drivers. + * If you need a new product number allocating, please + * post to xen-devel@lists.xensource.com. You should NOT use + * a product number without allocating one. + * If you maintain a separate versioning and distribution path + * for PV drivers you should have a separate product number so + * that your drivers can be separated from others. + * + * During development, you may use the product ID to + * indicate a driver which is yet to be released. + */ + +#define PVDRIVERS_PRODUCT_LIST(EACH) \ + EACH("xensource-windows", 0x0001) /* Citrix */ \ + EACH("gplpv-windows", 0x0002) /* James Harper */ \ + EACH("linux", 0x0003) \ + EACH("xenserver-windows-v7.0+", 0x0004) /* Citrix */ \ + EACH("xenserver-windows-v7.2+", 0x0005) /* Citrix */ \ + EACH("experimental", 0xffff) + +#endif /* _XEN_PUBLIC_PVDRIVERS_H_ */ Property changes on: projects/clang370-import/sys/xen/interface/hvm/pvdrivers.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/hvm/save.h =================================================================== --- projects/clang370-import/sys/xen/interface/hvm/save.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/hvm/save.h (revision 288926) @@ -1,113 +1,111 @@ /* * hvm/save.h * * Structure definitions for HVM state that is held by Xen and must * be saved along with the domain's memory and device-model state. * * Copyright (c) 2007 XenSource Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_SAVE_H__ #define __XEN_PUBLIC_HVM_SAVE_H__ /* * Structures in this header *must* have the same layout in 32bit * and 64bit environments: this means that all fields must be explicitly * sized types and aligned to their sizes, and the structs must be * a multiple of eight bytes long. * * Only the state necessary for saving and restoring (i.e. fields * that are analogous to actual hardware state) should go in this file. * Internal mechanisms should be kept in Xen-private headers. */ #if !defined(__GNUC__) || defined(__STRICT_ANSI__) #error "Anonymous structs/unions are a GNU extension." #endif /* * Each entry is preceded by a descriptor giving its type and length */ struct hvm_save_descriptor { uint16_t typecode; /* Used to demux the various types below */ uint16_t instance; /* Further demux within a type */ uint32_t length; /* In bytes, *not* including this descriptor */ }; /* * Each entry has a datatype associated with it: for example, the CPU state * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system * ugliness. */ #ifdef __XEN__ # define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } # include /* BUG() */ # define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } #else # define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];} # define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];} #endif #define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) #define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) #define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) #ifdef __XEN__ # define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t) # define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) # define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) # define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) #endif /* * The series of save records is teminated by a zero-type, zero-length * descriptor. */ struct hvm_save_end {}; DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); #if defined(__i386__) || defined(__x86_64__) #include "../arch-x86/hvm/save.h" -#elif defined(__ia64__) -#include "../arch-ia64/hvm/save.h" -#elif defined(__arm__) +#elif defined(__arm__) || defined(__aarch64__) #include "../arch-arm/hvm/save.h" #else #error "unsupported architecture" #endif #endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ Index: projects/clang370-import/sys/xen/interface/io/blkif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/blkif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/blkif.h (revision 288926) @@ -1,641 +1,646 @@ /****************************************************************************** * blkif.h * * Unified block-device I/O interface for Xen guest OSes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2003-2004, Keir Fraser * Copyright (c) 2012, Spectra Logic Corporation */ #ifndef __XEN_PUBLIC_IO_BLKIF_H__ #define __XEN_PUBLIC_IO_BLKIF_H__ #include "ring.h" #include "../grant_table.h" /* * Front->back notifications: When enqueuing a new request, sending a * notification can be made conditional on req_event (i.e., the generic * hold-off mechanism provided by the ring macros). Backends must set * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). * * Back->front notifications: When enqueuing a new response, sending a * notification can be made conditional on rsp_event (i.e., the generic * hold-off mechanism provided by the ring macros). Frontends must set * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). */ #ifndef blkif_vdev_t #define blkif_vdev_t uint16_t #endif #define blkif_sector_t uint64_t /* * Feature and Parameter Negotiation * ================================= * The two halves of a Xen block driver utilize nodes within the XenStore to * communicate capabilities and to negotiate operating parameters. This * section enumerates these nodes which reside in the respective front and * backend portions of the XenStore, following the XenBus convention. * * All data in the XenStore is stored as strings. Nodes specifying numeric * values are encoded in decimal. Integer value ranges listed below are * expressed as fixed sized integer types capable of storing the conversion - * of a properly formatted node string, without loss of information. + * of a properly formated node string, without loss of information. * * Any specified default value is in effect if the corresponding XenBus node * is not present in the XenStore. * * XenStore nodes in sections marked "PRIVATE" are solely for use by the * driver side whose XenBus tree contains them. * * XenStore nodes marked "DEPRECATED" in their notes section should only be * used to provide interoperability with legacy implementations. * * See the XenBus state transition diagram below for details on when XenBus * nodes must be published and when they can be queried. * ***************************************************************************** * Backend XenBus Nodes ***************************************************************************** * *------------------ Backend Device Identification (PRIVATE) ------------------ * * mode * Values: "r" (read only), "w" (writable) * * The read or write access permissions to the backing store to be * granted to the frontend. * * params * Values: string * - * Data used by the backend driver to locate and configure the backing - * device. The format and semantics of this data vary according to the - * backing device in use and are outside the scope of this specification. + * A free formatted string providing sufficient information for the + * backend driver to open the backing device. (e.g. the path to the + * file or block device representing the backing store.) * + * physical-device + * Values: "MAJOR:MINOR" + * + * MAJOR and MINOR are the major number and minor number of the + * backing device respectively. + * * type * Values: "file", "phy", "tap" * * The type of the backing device/object. * * * direct-io-safe * Values: 0/1 (boolean) * Default Value: 0 * * The underlying storage is not affected by the direct IO memory * lifetime bug. See: * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html * * Therefore this option gives the backend permission to use * O_DIRECT, notwithstanding that bug. * * That is, if this option is enabled, use of O_DIRECT is safe, * in circumstances where we would normally have avoided it as a * workaround for that bug. This option is not relevant for all * backends, and even not necessarily supported for those for * which it is relevant. A backend which knows that it is not * affected by the bug can ignore this option. * * This option doesn't require a backend to use O_DIRECT, so it * should not be used to try to control the caching behaviour. * *--------------------------------- Features --------------------------------- * * feature-barrier * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process requests * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * * feature-flush-cache * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process requests * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * * feature-discard * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process requests * containing the BLKIF_OP_DISCARD request opcode. Requests * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * * feature-persistent * Values: 0/1 (boolean) * Default Value: 0 * Notes: 7 * * A value of "1" indicates that the backend can keep the grants used * by the frontend driver mapped, so the same set of grants should be * used in all transactions. The maximum number of grants the backend * can map persistently depends on the implementation, but ideally it * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this * feature the backend doesn't need to unmap each grant, preventing * costly TLB flushes. The backend driver should only map grants * persistently if the frontend supports it. If a backend driver chooses * to use the persistent protocol when the frontend doesn't support it, * it will probably hit the maximum number of persistently mapped grants * (due to the fact that the frontend won't be reusing the same grants), * and fall back to non-persistent mode. Backend implementations may * shrink or expand the number of persistently mapped grants without * notifying the frontend depending on memory constraints (this might * cause a performance degradation). * * If a backend driver wants to limit the maximum number of persistently * mapped grants to a value less than RING_SIZE * * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to * discard the grants that are less commonly used. Using a LRU in the * backend driver paired with a LIFO queue in the frontend will * allow us to have better performance in this scenario. * *----------------------- Request Transport Parameters ------------------------ * * max-ring-page-order * Values: * Default Value: 0 * Notes: 1, 3 * * The maximum supported size of the request ring buffer in units of * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, * etc.). * * max-ring-pages * Values: * Default Value: 1 * Notes: DEPRECATED, 2, 3 * * The maximum supported size of the request ring buffer in units of * machine pages. The value must be a power of 2. * *------------------------- Backend Device Properties ------------------------- * * discard-enable * Values: 0/1 (boolean) * Default Value: 1 * * This optional property, set by the toolstack, instructs the backend * to offer discard to the frontend. If the property is missing the * backend should offer discard if the backing storage actually supports * it. This optional property, set by the toolstack, requests that the * backend offer, or not offer, discard to the frontend. * * discard-alignment * Values: * Default Value: 0 * Notes: 4, 5 * * The offset, in bytes from the beginning of the virtual block device, * to the first, addressable, discard extent on the underlying device. * * discard-granularity * Values: * Default Value: <"sector-size"> * Notes: 4 * * The size, in bytes, of the individually addressable discard extents * of the underlying device. * * discard-secure * Values: 0/1 (boolean) * Default Value: 0 * Notes: 10 * * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD * requests with the BLKIF_DISCARD_SECURE flag set. * * info * Values: (bitmap) * * A collection of bit flags describing attributes of the backing * device. The VDISK_* macros define the meaning of each bit * location. * * sector-size * Values: * * The logical sector size, in bytes, of the backend device. * * physical-sector-size * Values: * * The physical sector size, in bytes, of the backend device. * * sectors * Values: * * The size of the backend device, expressed in units of its logical * sector size ("sector-size"). * ***************************************************************************** * Frontend XenBus Nodes ***************************************************************************** * *----------------------- Request Transport Parameters ----------------------- * * event-channel * Values: * * The identifier of the Xen event channel used to signal activity * in the ring buffer. * * ring-ref * Values: * Notes: 6 * * The Xen grant reference granting permission for the backend to map * the sole page in a single page sized ring buffer. * * ring-ref%u * Values: * Notes: 6 * * For a frontend providing a multi-page ring, a "number of ring pages" * sized list of nodes, each containing a Xen grant reference granting * permission for the backend to map the page of the ring located * at page index "%u". Page indexes are zero based. * * protocol * Values: string (XEN_IO_PROTO_ABI_*) * Default Value: XEN_IO_PROTO_ABI_NATIVE * * The machine ABI rules governing the format of all ring request and * response structures. * * ring-page-order * Values: * Default Value: 0 * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) * Notes: 1, 3 * * The size of the frontend allocated request ring buffer in units * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, * etc.). * * num-ring-pages * Values: * Default Value: 1 * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) * Notes: DEPRECATED, 2, 3 * * The size of the frontend allocated request ring buffer in units of * machine pages. The value must be a power of 2. * * feature-persistent * Values: 0/1 (boolean) * Default Value: 0 * Notes: 7, 8, 9 * * A value of "1" indicates that the frontend will reuse the same grants * for all transactions, allowing the backend to map them with write * access (even when it should be read-only). If the frontend hits the * maximum number of allowed persistently mapped grants, it can fallback * to non persistent mode. This will cause a performance degradation, * since the the backend driver will still try to map those grants * persistently. Since the persistent grants protocol is compatible with * the previous protocol, a frontend driver can choose to work in * persistent mode even when the backend doesn't support it. * * It is recommended that the frontend driver stores the persistently * mapped grants in a LIFO queue, so a subset of all persistently mapped * grants gets used commonly. This is done in case the backend driver * decides to limit the maximum number of persistently mapped grants * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. * *------------------------- Virtual Device Properties ------------------------- * * device-type * Values: "disk", "cdrom", "floppy", etc. * * virtual-device * Values: * * A value indicating the physical device to virtualize within the * frontend's domain. (e.g. "The first ATA disk", "The third SCSI * disk", etc.) * * See docs/misc/vbd-interface.txt for details on the format of this * value. * * Notes * ----- * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer * PV drivers. * (2) Multi-page ring buffer scheme first used in some RedHat distributions * including a distribution deployed on certain nodes of the Amazon * EC2 cluster. * (3) Support for multi-page ring buffers was implemented independently, * in slightly different forms, by both Citrix and RedHat/Amazon. * For full interoperability, block front and backends should publish * identical ring parameters, adjusted for unit differences, to the * XenStore nodes used in both schemes. * (4) Devices that support discard functionality may internally allocate space * (discardable extents) in units that are larger than the exported logical * block size. If the backing device has such discardable extents the * backend should provide both discard-granularity and discard-alignment. * Providing just one of the two may be considered an error by the frontend. * Backends supporting discard should include discard-granularity and * discard-alignment even if it supports discarding individual sectors. * Frontends should assume discard-alignment == 0 and discard-granularity * == sector size if these keys are missing. * (5) The discard-alignment parameter allows a physical device to be * partitioned into virtual devices that do not necessarily begin or * end on a discardable extent boundary. * (6) When there is only a single page allocated to the request ring, * 'ring-ref' is used to communicate the grant reference for this * page to the backend. When using a multi-page ring, the 'ring-ref' * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. * (7) When using persistent grants data has to be copied from/to the page * where the grant is currently mapped. The overhead of doing this copy * however doesn't suppress the speed improvement of not having to unmap * the grants. * (8) The frontend driver has to allow the backend driver to map all grants * with write access, even when they should be mapped read-only, since * further requests may reuse these grants and require write permissions. * (9) Linux implementation doesn't have a limit on the maximum number of * grants that can be persistently mapped in the frontend driver, but * due to the frontent driver implementation it should never be bigger * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. *(10) The discard-secure property may be present and will be set to 1 if the * backing device supports secure discard. */ /* * STATE DIAGRAMS * ***************************************************************************** * Startup * ***************************************************************************** * * Tool stack creates front and back nodes with state XenbusStateInitialising. * * Front Back * ================================= ===================================== * XenbusStateInitialising XenbusStateInitialising * o Query virtual device o Query backend device identification * properties. data. * o Setup OS device instance. o Open and validate backend device. * o Publish backend features and * transport parameters. * | * | * V * XenbusStateInitWait * * o Query backend features and * transport parameters. * o Allocate and initialize the * request ring. * o Publish transport parameters * that will be in effect during * this connection. * | * | * V * XenbusStateInitialised * * o Query frontend transport parameters. * o Connect to the request ring and * event channel. * o Publish backend device properties. * | * | * V * XenbusStateConnected * * o Query backend device properties. * o Finalize OS virtual device * instance. * | * | * V * XenbusStateConnected * * Note: Drivers that do not support any optional features, or the negotiation * of transport parameters, can skip certain states in the state machine: * * o A frontend may transition to XenbusStateInitialised without * waiting for the backend to enter XenbusStateInitWait. In this * case, default transport parameters are in effect and any * transport parameters published by the frontend must contain * their default values. * * o A backend may transition to XenbusStateInitialised, bypassing * XenbusStateInitWait, without waiting for the frontend to first * enter the XenbusStateInitialised state. In this case, default * transport parameters are in effect and any transport parameters * published by the backend must contain their default values. * * Drivers that support optional features and/or transport parameter * negotiation must tolerate these additional state transition paths. * In general this means performing the work of any skipped state * transition, if it has not already been performed, in addition to the * work associated with entry into the current state. */ /* * REQUEST CODES. */ #define BLKIF_OP_READ 0 #define BLKIF_OP_WRITE 1 /* * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER * operation code ("barrier request") must be completed prior to the * execution of the barrier request. All writes issued after the barrier * request must not execute until after the completion of the barrier request. * * Optional. See "feature-barrier" XenBus node documentation above. */ #define BLKIF_OP_WRITE_BARRIER 2 /* * Commit any uncommitted contents of the backing device's volatile cache * to stable storage. * * Optional. See "feature-flush-cache" XenBus node documentation above. */ #define BLKIF_OP_FLUSH_DISKCACHE 3 /* * Used in SLES sources for device specific command packet * contained within the request. Reserved for that purpose. */ #define BLKIF_OP_RESERVED_1 4 /* * Indicate to the backend device that a region of storage is no longer in * use, and may be discarded at any time without impact to the client. If * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the * discarded region on the device must be rendered unrecoverable before the * command returns. * - * This operation is analogous to performing a trim (ATA) or unmap (SCSI), + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), * command on a native device. * * More information about trim/unmap operations can be found at: * http://t13.org/Documents/UploadedDocuments/docs2008/ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc * http://www.seagate.com/staticfiles/support/disc/manuals/ * Interface%20manuals/100293068c.pdf * * Optional. See "feature-discard", "discard-alignment", * "discard-granularity", and "discard-secure" in the XenBus node * documentation above. */ #define BLKIF_OP_DISCARD 5 /* * Recognized if "feature-max-indirect-segments" in present in the backend * xenbus info. The "feature-max-indirect-segments" node contains the maximum * number of segments allowed by the backend per request. If the node is * present, the frontend might use blkif_request_indirect structs in order to * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The * maximum number of indirect segments is fixed by the backend, but the * frontend can issue requests with any number of indirect segments as long as * it's less than the number provided by the backend. The indirect_grefs field * in blkif_request_indirect should be filled by the frontend with the * grant references of the pages that are holding the indirect segments. * These pages are filled with an array of blkif_request_segment that hold the * information about the segments. The number of indirect pages to use is * determined by the number of segments an indirect request contains. Every * indirect page can contain a maximum of * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to * calculate the number of indirect pages to use we have to do * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). * * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* * create the "feature-max-indirect-segments" node! */ #define BLKIF_OP_INDIRECT 6 /* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 /* * Maximum number of indirect pages to use per request. */ #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 /* * NB. first_sect and last_sect in blkif_request_segment, as well as * sector_number in blkif_request, are always expressed in 512-byte units. * However they must be properly aligned to the real sector size of the * physical disk, which is reported in the "physical-sector-size" node in * the backend xenbus info. Also the xenbus "sectors" node is expressed in * 512-byte units. */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ /* @first_sect: first sector in frame to transfer (inclusive). */ /* @last_sect: last sector in frame to transfer (inclusive). */ uint8_t first_sect, last_sect; }; -typedef struct blkif_request_segment blkif_request_segment_t; /* * Starting ring element for any I/O request. */ struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - blkif_request_segment_t seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; typedef struct blkif_request blkif_request_t; /* * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) */ struct blkif_request_discard { uint8_t operation; /* BLKIF_OP_DISCARD */ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ blkif_vdev_t handle; /* same as for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk */ uint64_t nr_sectors; /* number of contiguous sectors to discard*/ }; typedef struct blkif_request_discard blkif_request_discard_t; struct blkif_request_indirect { uint8_t operation; /* BLKIF_OP_INDIRECT */ uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ uint16_t nr_segments; /* number of segments */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ blkif_vdev_t handle; /* same as for read/write requests */ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; #ifdef __i386__ uint64_t pad; /* Make it 64 byte aligned on i386 */ #endif }; typedef struct blkif_request_indirect blkif_request_indirect_t; struct blkif_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; typedef struct blkif_response blkif_response_t; /* * STATUS RETURN CODES. */ /* Operation not supported (only happens on barrier writes). */ #define BLKIF_RSP_EOPNOTSUPP -2 /* Operation failed for some unspecified reason (-EIO). */ #define BLKIF_RSP_ERROR -1 /* Operation completed successfully. */ #define BLKIF_RSP_OKAY 0 /* * Generate blkif ring structures and types. */ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 #define VDISK_READONLY 0x4 #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/console.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/console.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/console.h (revision 288926) @@ -1,51 +1,51 @@ /****************************************************************************** * console.h * * Console I/O interface for Xen guest OSes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_IO_CONSOLE_H__ #define __XEN_PUBLIC_IO_CONSOLE_H__ typedef uint32_t XENCONS_RING_IDX; #define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) struct xencons_interface { char in[1024]; char out[2048]; XENCONS_RING_IDX in_cons, in_prod; XENCONS_RING_IDX out_cons, out_prod; }; #endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/fbif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/fbif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/fbif.h (revision 288926) @@ -1,176 +1,176 @@ /* * fbif.h -- Xen virtual frame buffer device * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (C) 2005 Anthony Liguori * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster */ #ifndef __XEN_PUBLIC_IO_FBIF_H__ #define __XEN_PUBLIC_IO_FBIF_H__ /* Out events (frontend -> backend) */ /* * Out events may be sent only when requested by backend, and receipt * of an unknown out event is an error. */ /* Event type 1 currently not used */ /* * Framebuffer update notification event * Capable frontend sets feature-update in xenstore. * Backend requests it by setting request-update in xenstore. */ #define XENFB_TYPE_UPDATE 2 struct xenfb_update { uint8_t type; /* XENFB_TYPE_UPDATE */ int32_t x; /* source x */ int32_t y; /* source y */ int32_t width; /* rect width */ int32_t height; /* rect height */ }; /* * Framebuffer resize notification event * Capable backend sets feature-resize in xenstore. */ #define XENFB_TYPE_RESIZE 3 struct xenfb_resize { uint8_t type; /* XENFB_TYPE_RESIZE */ int32_t width; /* width in pixels */ int32_t height; /* height in pixels */ int32_t stride; /* stride in bytes */ int32_t depth; /* depth in bits */ int32_t offset; /* offset of the framebuffer in bytes */ }; #define XENFB_OUT_EVENT_SIZE 40 union xenfb_out_event { uint8_t type; struct xenfb_update update; struct xenfb_resize resize; char pad[XENFB_OUT_EVENT_SIZE]; }; /* In events (backend -> frontend) */ /* * Frontends should ignore unknown in events. */ /* * Framebuffer refresh period advice * Backend sends it to advise the frontend their preferred period of * refresh. Frontends that keep the framebuffer constantly up-to-date * just ignore it. Frontends that use the advice should immediately * refresh the framebuffer (and send an update notification event if * those have been requested), then use the update frequency to guide * their periodical refreshs. */ #define XENFB_TYPE_REFRESH_PERIOD 1 #define XENFB_NO_REFRESH 0 struct xenfb_refresh_period { uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ uint32_t period; /* period of refresh, in ms, * XENFB_NO_REFRESH if no refresh is needed */ }; #define XENFB_IN_EVENT_SIZE 40 union xenfb_in_event { uint8_t type; struct xenfb_refresh_period refresh_period; char pad[XENFB_IN_EVENT_SIZE]; }; /* shared page */ #define XENFB_IN_RING_SIZE 1024 #define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) #define XENFB_IN_RING_OFFS 1024 #define XENFB_IN_RING(page) \ ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) #define XENFB_IN_RING_REF(page, idx) \ (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) #define XENFB_OUT_RING_SIZE 2048 #define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) #define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) #define XENFB_OUT_RING(page) \ ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) #define XENFB_OUT_RING_REF(page, idx) \ (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) struct xenfb_page { uint32_t in_cons, in_prod; uint32_t out_cons, out_prod; int32_t width; /* the width of the framebuffer (in pixels) */ int32_t height; /* the height of the framebuffer (in pixels) */ uint32_t line_length; /* the length of a row of pixels (in bytes) */ uint32_t mem_length; /* the length of the framebuffer (in bytes) */ uint8_t depth; /* the depth of a pixel (in bits) */ /* * Framebuffer page directory * * Each directory page holds PAGE_SIZE / sizeof(*pd) * framebuffer pages, and can thus map up to PAGE_SIZE * * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs * 64 bit. 256 directories give enough room for a 512 Meg * framebuffer with a max resolution of 12,800x10,240. Should * be enough for a while with room leftover for expansion. */ unsigned long pd[256]; }; /* * Wart: xenkbd needs to know default resolution. Put it here until a * better solution is found, but don't leak it to the backend. */ #ifdef __KERNEL__ #define XENFB_WIDTH 800 #define XENFB_HEIGHT 600 #define XENFB_DEPTH 32 #endif #endif /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/kbdif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/kbdif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/kbdif.h (revision 288926) @@ -1,132 +1,132 @@ /* * kbdif.h -- Xen virtual keyboard/mouse * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (C) 2005 Anthony Liguori * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster */ #ifndef __XEN_PUBLIC_IO_KBDIF_H__ #define __XEN_PUBLIC_IO_KBDIF_H__ /* In events (backend -> frontend) */ /* * Frontends should ignore unknown in events. */ /* Pointer movement event */ #define XENKBD_TYPE_MOTION 1 /* Event type 2 currently not used */ /* Key event (includes pointer buttons) */ #define XENKBD_TYPE_KEY 3 /* * Pointer position event * Capable backend sets feature-abs-pointer in xenstore. * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting * request-abs-update in xenstore. */ #define XENKBD_TYPE_POS 4 struct xenkbd_motion { uint8_t type; /* XENKBD_TYPE_MOTION */ int32_t rel_x; /* relative X motion */ int32_t rel_y; /* relative Y motion */ int32_t rel_z; /* relative Z motion (wheel) */ }; struct xenkbd_key { uint8_t type; /* XENKBD_TYPE_KEY */ uint8_t pressed; /* 1 if pressed; 0 otherwise */ uint32_t keycode; /* KEY_* from linux/input.h */ }; struct xenkbd_position { uint8_t type; /* XENKBD_TYPE_POS */ int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_y; /* absolute Y position (in FB pixels) */ int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 union xenkbd_in_event { uint8_t type; struct xenkbd_motion motion; struct xenkbd_key key; struct xenkbd_position pos; char pad[XENKBD_IN_EVENT_SIZE]; }; /* Out events (frontend -> backend) */ /* * Out events may be sent only when requested by backend, and receipt * of an unknown out event is an error. * No out events currently defined. */ #define XENKBD_OUT_EVENT_SIZE 40 union xenkbd_out_event { uint8_t type; char pad[XENKBD_OUT_EVENT_SIZE]; }; /* shared page */ #define XENKBD_IN_RING_SIZE 2048 #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) #define XENKBD_IN_RING_OFFS 1024 #define XENKBD_IN_RING(page) \ ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) #define XENKBD_IN_RING_REF(page, idx) \ (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) #define XENKBD_OUT_RING_SIZE 1024 #define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) #define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) #define XENKBD_OUT_RING(page) \ ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) #define XENKBD_OUT_RING_REF(page, idx) \ (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) struct xenkbd_page { uint32_t in_cons, in_prod; uint32_t out_cons, out_prod; }; #endif /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/libxenvchan.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/libxenvchan.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/libxenvchan.h (revision 288926) @@ -1,97 +1,96 @@ /** * @file * @section AUTHORS * * Copyright (C) 2010 Rafal Wojtczuk * * Authors: * Rafal Wojtczuk * Daniel De Graaf * * @section LICENSE * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * License along with this library; If not, see . * * @section DESCRIPTION * * Originally borrowed from the Qubes OS Project, http://www.qubes-os.org, * this code has been substantially rewritten to use the gntdev and gntalloc * devices instead of raw MFNs and map_foreign_range. * * This is a library for inter-domain communication. A standard Xen ring * buffer is used, with a datagram-based interface built on top. The grant * reference and event channels are shared in XenStore under a user-specified * path. * * The ring.h macros define an asymmetric interface to a shared data structure * that assumes all rings reside in a single contiguous memory space. This is * not suitable for vchan because the interface to the ring is symmetric except * for the setup. Unlike the producer-consumer rings defined in ring.h, the * size of the rings used in vchan are determined at execution time instead of * compile time, so the macros in ring.h cannot be used to access the rings. */ #include #include struct ring_shared { uint32_t cons, prod; }; #define VCHAN_NOTIFY_WRITE 0x1 #define VCHAN_NOTIFY_READ 0x2 /** * vchan_interface: primary shared data structure */ struct vchan_interface { /** * Standard consumer/producer interface, one pair per buffer * left is client write, server read * right is client read, server write */ struct ring_shared left, right; /** * size of the rings, which determines their location * 10 - at offset 1024 in ring's page * 11 - at offset 2048 in ring's page * 12+ - uses 2^(N-12) grants to describe the multi-page ring * These should remain constant once the page is shared. * Only one of the two orders can be 10 (or 11). */ uint16_t left_order, right_order; /** * Shutdown detection: * 0: client (or server) has exited * 1: client (or server) is connected * 2: client has not yet connected */ uint8_t cli_live, srv_live; /** * Notification bits: * VCHAN_NOTIFY_WRITE: send notify when data is written * VCHAN_NOTIFY_READ: send notify when data is read (consumed) * cli_notify is used for the client to inform the server of its action */ uint8_t cli_notify, srv_notify; /** * Grant list: ordering is left, right. Must not extend into actual ring * or grow beyond the end of the initial shared page. * These should remain constant once the page is shared, to allow * for possible remapping by a client that restarts. */ uint32_t grants[0]; }; Index: projects/clang370-import/sys/xen/interface/io/netif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/netif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/netif.h (revision 288926) @@ -1,211 +1,429 @@ /****************************************************************************** * netif.h * * Unified network-device I/O interface for Xen guest OSes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2003-2004, Keir Fraser */ #ifndef __XEN_PUBLIC_IO_NETIF_H__ #define __XEN_PUBLIC_IO_NETIF_H__ #include "ring.h" #include "../grant_table.h" /* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* * Notifications after enqueuing any type of message should be conditional on * the appropriate req_event or rsp_event field in the shared ring. * If the client sends notification for rx requests then it should specify * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume * that it cannot safely queue packets (as it may not be kicked to send them). */ /* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = " */ #ifndef __XEN_PCI_COMMON_H__ #define __XEN_PCI_COMMON_H__ /* Be sure to bump this number if you change this file */ #define XEN_PCI_MAGIC "7" /* xen_pci_sharedinfo flags */ #define _XEN_PCIF_active (0) #define XEN_PCIF_active (1<<_XEN_PCIF_active) #define _XEN_PCIB_AERHANDLER (1) #define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) #define _XEN_PCIB_active (2) #define XEN_PCIB_active (1<<_XEN_PCIB_active) /* xen_pci_op commands */ #define XEN_PCI_OP_conf_read (0) #define XEN_PCI_OP_conf_write (1) #define XEN_PCI_OP_enable_msi (2) #define XEN_PCI_OP_disable_msi (3) #define XEN_PCI_OP_enable_msix (4) #define XEN_PCI_OP_disable_msix (5) #define XEN_PCI_OP_aer_detected (6) #define XEN_PCI_OP_aer_resume (7) #define XEN_PCI_OP_aer_mmio (8) #define XEN_PCI_OP_aer_slotreset (9) +#define XEN_PCI_OP_enable_multi_msi (10) /* xen_pci_op error numbers */ #define XEN_PCI_ERR_success (0) #define XEN_PCI_ERR_dev_not_found (-1) #define XEN_PCI_ERR_invalid_offset (-2) #define XEN_PCI_ERR_access_denied (-3) #define XEN_PCI_ERR_not_implemented (-4) /* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ #define XEN_PCI_ERR_op_failed (-5) /* * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) * Should not exceed 128 */ #define SH_INFO_MAX_VEC 128 struct xen_msix_entry { uint16_t vector; uint16_t entry; }; struct xen_pci_op { /* IN: what action to perform: XEN_PCI_OP_* */ uint32_t cmd; /* OUT: will contain an error number (if any) from errno.h */ int32_t err; /* IN: which device to touch */ uint32_t domain; /* PCI Domain/Segment */ uint32_t bus; uint32_t devfn; /* IN: which configuration registers to touch */ int32_t offset; int32_t size; /* IN/OUT: Contains the result after a READ or the value to WRITE */ uint32_t value; /* IN: Contains extra infor for this operation */ uint32_t info; /*IN: param for msi-x */ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; }; /*used for pcie aer handling*/ struct xen_pcie_aer_op { /* IN: what action to perform: XEN_PCI_OP_* */ uint32_t cmd; /*IN/OUT: return aer_op result or carry error_detected state as input*/ int32_t err; /* IN: which device to touch */ uint32_t domain; /* PCI Domain/Segment*/ uint32_t bus; uint32_t devfn; }; struct xen_pci_sharedinfo { /* flags - XEN_PCIF_* */ uint32_t flags; struct xen_pci_op op; struct xen_pcie_aer_op aer_op; }; #endif /* __XEN_PCI_COMMON_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/protocols.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/protocols.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/protocols.h (revision 288926) @@ -1,43 +1,42 @@ /****************************************************************************** * protocols.h * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2008, Keir Fraser */ #ifndef __XEN_PROTOCOLS_H__ #define __XEN_PROTOCOLS_H__ #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" -#define XEN_IO_PROTO_ABI_IA64 "ia64-abi" #define XEN_IO_PROTO_ABI_ARM "arm-abi" #if defined(__i386__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 #elif defined(__x86_64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 -#elif defined(__ia64__) -# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 -#elif defined(__arm__) +#elif defined(__arm__) || defined(__aarch64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM #else # error arch fixup needed here #endif #endif Index: projects/clang370-import/sys/xen/interface/io/ring.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/ring.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/ring.h (revision 288926) @@ -1,339 +1,312 @@ /****************************************************************************** * ring.h * * Shared producer-consumer ring macros. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Tim Deegan and Andrew Warfield November 2004. */ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ #include "../xen-compat.h" #if __XEN_INTERFACE_VERSION__ < 0x00030208 #define xen_mb() mb() #define xen_rmb() rmb() #define xen_wmb() wmb() #endif typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ #define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) #define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) #define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) #define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) /* - * The amount of space reserved in the shared ring for accounting information. - */ -#define __RING_HEADER_SIZE(_s) \ - ((intptr_t)(_s)->ring - (intptr_t)(_s)) - -/* * Calculate size of a shared ring, given the total available space for the * ring and indexes (_sz), and the name tag of the request/response structure. - * A ring contains as many entries as will fit, rounded down to the nearest + * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ #define __CONST_RING_SIZE(_s, _sz) \ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ sizeof(((struct _s##_sring *)0)->ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ #define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - __RING_HEADER_SIZE(_s)) / sizeof((_s)->ring[0]))) + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* - * The number of pages needed to support a given number of request/reponse - * entries. The entry count is rounded down to the nearest power of two - * as required by the ring macros. - */ -#define __RING_PAGES(_s, _entries) \ - ((__RING_HEADER_SIZE(_s) \ - + (__RD32(_entries) * sizeof((_s)->ring[0])) \ - + PAGE_SIZE - 1) / PAGE_SIZE) - -/* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * * mytag_sring_t - The shared ring. * mytag_front_ring_t - The 'front' half of the ring. * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * * mytag_front_ring_t front_ring; * SHARED_RING_INIT((mytag_sring_t *)shared_page); * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * * mytag_back_ring_t back_ring; * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ #define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ \ /* Shared ring entry */ \ union __name##_sring_entry { \ __req_t req; \ __rsp_t rsp; \ }; \ \ /* Shared ring page */ \ struct __name##_sring { \ RING_IDX req_prod, req_event; \ RING_IDX rsp_prod, rsp_event; \ union { \ struct { \ uint8_t smartpoll_active; \ } netif; \ struct { \ uint8_t msg; \ } tapif_user; \ uint8_t pvt_pad[4]; \ - } private; \ + } pvt; \ uint8_t __pad[44]; \ union __name##_sring_entry ring[1]; /* variable-length */ \ }; \ \ /* "Front" end's private variables */ \ struct __name##_front_ring { \ RING_IDX req_prod_pvt; \ RING_IDX rsp_cons; \ unsigned int nr_ents; \ struct __name##_sring *sring; \ }; \ \ /* "Back" end's private variables */ \ struct __name##_back_ring { \ RING_IDX rsp_prod_pvt; \ RING_IDX req_cons; \ unsigned int nr_ents; \ struct __name##_sring *sring; \ }; \ \ /* Syntactic sugar */ \ typedef struct __name##_sring __name##_sring_t; \ typedef struct __name##_front_ring __name##_front_ring_t; \ typedef struct __name##_back_ring __name##_back_ring_t /* * Macros for manipulating rings. * * FRONT_RING_whatever works on the "front end" of a ring: here * requests are pushed on to the ring and responses taken off it. * * BACK_RING_whatever works on the "back end" of a ring: here * requests are taken off the ring and responses put on. * * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. * This is OK in 1-for-1 request-response situations where the * requestor (front end) never has more than RING_SIZE()-1 * outstanding requests. */ /* Initialising empty rings */ #define SHARED_RING_INIT(_s) do { \ (_s)->req_prod = (_s)->rsp_prod = 0; \ (_s)->req_event = (_s)->rsp_event = 1; \ - (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); \ (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) #define FRONT_RING_INIT(_r, _s, __size) do { \ (_r)->req_prod_pvt = 0; \ (_r)->rsp_cons = 0; \ (_r)->nr_ents = __RING_SIZE(_s, __size); \ (_r)->sring = (_s); \ } while (0) #define BACK_RING_INIT(_r, _s, __size) do { \ (_r)->rsp_prod_pvt = 0; \ (_r)->req_cons = 0; \ (_r)->nr_ents = __RING_SIZE(_s, __size); \ (_r)->sring = (_s); \ } while (0) -/* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) - -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) - /* How big is this ring? */ #define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ #define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ #define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ #define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) #ifdef __GNUC__ #define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ unsigned int rsp = RING_SIZE(_r) - \ ((_r)->req_cons - (_r)->rsp_prod_pvt); \ req < rsp ? req : rsp; \ }) #else /* Same as above, but without the nice GCC ({ ... }) syntax. */ #define RING_HAS_UNCONSUMED_REQUESTS(_r) \ ((((_r)->sring->req_prod - (_r)->req_cons) < \ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ ((_r)->sring->req_prod - (_r)->req_cons) : \ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) #endif /* Direct access to individual ring elements, by index. */ #define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) #define RING_GET_RESPONSE(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) /* Loop termination condition: Would the specified index overflow the ring? */ #define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + #define RING_PUSH_REQUESTS(_r) do { \ xen_wmb(); /* back sees requests /before/ updated producer index */ \ (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) #define RING_PUSH_RESPONSES(_r) do { \ xen_wmb(); /* front sees resps /before/ updated producer index */ \ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* * Notification hold-off (req_event and rsp_event): * * When queueing requests or responses on a shared ring, it may not always be * necessary to notify the remote end. For example, if requests are in flight * in a backend, the front may be able to queue further requests without * notifying the back (if the back checks for new requests when it queues * responses). * * When enqueuing requests or responses: * * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument * is a boolean return value. True indicates that the receiver requires an * asynchronous notification. * * After dequeuing requests or responses (before sleeping the connection): * * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). * The second argument is a boolean return value. True indicates that there * are pending messages on the ring (i.e., the connection should not be put * to sleep). * * These macros will set the req_event/rsp_event field to trigger a * notification on the very next message that is enqueued. If you want to * create batches of work (i.e., only receive a notification after several * messages have been enqueued) then you will need to create a customised * version of the FINAL_CHECK macro in your own code, which sets the event * field appropriately. */ #define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ RING_IDX __old = (_r)->sring->req_prod; \ RING_IDX __new = (_r)->req_prod_pvt; \ xen_wmb(); /* back sees requests /before/ updated producer index */ \ (_r)->sring->req_prod = __new; \ xen_mb(); /* back sees new requests /before/ we check req_event */ \ (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ (RING_IDX)(__new - __old)); \ } while (0) #define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ RING_IDX __old = (_r)->sring->rsp_prod; \ RING_IDX __new = (_r)->rsp_prod_pvt; \ xen_wmb(); /* front sees resps /before/ updated producer index */ \ (_r)->sring->rsp_prod = __new; \ xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ (RING_IDX)(__new - __old)); \ } while (0) #define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ if (_work_to_do) break; \ (_r)->sring->req_event = (_r)->req_cons + 1; \ xen_mb(); \ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) #define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ if (_work_to_do) break; \ (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ xen_mb(); \ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) #endif /* __XEN_PUBLIC_IO_RING_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/tpmif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/tpmif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/tpmif.h (revision 288926) @@ -1,77 +1,143 @@ /****************************************************************************** * tpmif.h * * TPM I/O interface for Xen guest OSes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, IBM Corporation * * Author: Stefan Berger, stefanb@us.ibm.com * Grant table support: Mahadevan Gomathisankaran * * This code has been derived from tools/libxc/xen/io/netif.h * * Copyright (c) 2003-2004, Keir Fraser */ #ifndef __XEN_PUBLIC_IO_TPMIF_H__ #define __XEN_PUBLIC_IO_TPMIF_H__ #include "../grant_table.h" struct tpmif_tx_request { unsigned long addr; /* Machine address of packet. */ grant_ref_t ref; /* grant table access reference */ uint16_t unused; uint16_t size; /* Packet size in bytes. */ }; typedef struct tpmif_tx_request tpmif_tx_request_t; /* * The TPMIF_TX_RING_SIZE defines the number of pages the * front-end and backend can exchange (= size of array). */ typedef uint32_t TPMIF_RING_IDX; #define TPMIF_TX_RING_SIZE 1 /* This structure must fit in a memory page. */ struct tpmif_ring { struct tpmif_tx_request req; }; typedef struct tpmif_ring tpmif_ring_t; struct tpmif_tx_interface { struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; }; typedef struct tpmif_tx_interface tpmif_tx_interface_t; +/****************************************************************************** + * TPM I/O interface for Xen guest OSes, v2 + * + * Author: Daniel De Graaf + * + * This protocol emulates the request/response behavior of a TPM using a Xen + * shared memory interface. All interaction with the TPM is at the direction + * of the frontend, since a TPM (hardware or virtual) is a passive device - + * the backend only processes commands as requested by the frontend. + * + * The frontend sends a request to the TPM by populating the shared page with + * the request packet, changing the state to TPMIF_STATE_SUBMIT, and sending + * and event channel notification. When the backend is finished, it will set + * the state to TPMIF_STATE_FINISH and send an event channel notification. + * + * In order to allow long-running commands to be canceled, the frontend can + * at any time change the state to TPMIF_STATE_CANCEL and send a notification. + * The TPM can either finish the command (changing state to TPMIF_STATE_FINISH) + * or can cancel the command and change the state to TPMIF_STATE_IDLE. The TPM + * can also change the state to TPMIF_STATE_IDLE instead of TPMIF_STATE_FINISH + * if another reason for cancellation is required - for example, a physical + * TPM may cancel a command if the interface is seized by another locality. + * + * The TPM command format is defined by the TCG, and is available at + * http://www.trustedcomputinggroup.org/resources/tpm_main_specification + */ + +enum tpmif_state { + TPMIF_STATE_IDLE, /* no contents / vTPM idle / cancel complete */ + TPMIF_STATE_SUBMIT, /* request ready / vTPM working */ + TPMIF_STATE_FINISH, /* response ready / vTPM idle */ + TPMIF_STATE_CANCEL, /* cancel requested / vTPM working */ +}; +/* Note: The backend should only change state to IDLE or FINISH, while the + * frontend should only change to SUBMIT or CANCEL. Status changes do not need + * to use atomic operations. + */ + + +/* The shared page for vTPM request/response packets looks like: + * + * Offset Contents + * ================================================= + * 0 struct tpmif_shared_page + * 16 [optional] List of grant IDs + * 16+4*nr_extra_pages TPM packet data + * + * If the TPM packet data extends beyond the end of a single page, the grant IDs + * defined in extra_pages are used as if they were mapped immediately following + * the primary shared page. The grants are allocated by the frontend and mapped + * by the backend. Before sending a request spanning multiple pages, the + * frontend should verify that the TPM supports such large requests by querying + * the TPM_CAP_PROP_INPUT_BUFFER property from the TPM. + */ +struct tpmif_shared_page { + uint32_t length; /* request/response length in bytes */ + + uint8_t state; /* enum tpmif_state */ + uint8_t locality; /* for the current request */ + uint8_t pad; /* should be zero */ + + uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ + uint32_t extra_pages[0]; /* grant IDs; length is actually nr_extra_pages */ +}; +typedef struct tpmif_shared_page tpmif_shared_page_t; + #endif /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/usbif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/usbif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/usbif.h (revision 288926) @@ -1,151 +1,252 @@ /* * usbif.h * * USB I/O interface for Xen guest OSes. * * Copyright (C) 2009, FUJITSU LABORATORIES LTD. * Author: Noboru Iwamatsu * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_IO_USBIF_H__ #define __XEN_PUBLIC_IO_USBIF_H__ #include "ring.h" #include "../grant_table.h" +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * num-ports + * Values: unsigned [1...31] + * + * Number of ports for this (virtual) USB host connector. + * + * usb-ver + * Values: unsigned [1...2] + * + * USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0. + * + * port/[1...31] + * Values: string + * + * Physical USB device connected to the given port, e.g. "3-1.5". + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * urb-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for urb requests. + * + * conn-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for connection/disconnection requests. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + */ + enum usb_spec_version { USB_VER_UNKNOWN = 0, USB_VER_USB11, USB_VER_USB20, USB_VER_USB30, /* not supported yet */ }; /* * USB pipe in usbif_request * - * bits 0-5 are specific bits for virtual USB driver. - * bits 7-31 are standard urb pipe. + * - port number: bits 0-4 + * (USB_MAXCHILDREN is 31) * - * - port number(NEW): bits 0-4 - * (USB_MAXCHILDREN is 31) + * - operation flag: bit 5 + * (0 = submit urb, + * 1 = unlink urb) * - * - operation flag(NEW): bit 5 - * (0 = submit urb, - * 1 = unlink urb) - * * - direction: bit 7 - * (0 = Host-to-Device [Out] - * 1 = Device-to-Host [In]) + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) * * - device address: bits 8-14 * * - endpoint: bits 15-18 * - * - pipe type: bits 30-31 - * (00 = isochronous, 01 = interrupt, - * 10 = control, 11 = bulk) + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) */ -#define usbif_pipeportnum(pipe) ((pipe) & 0x1f) -#define usbif_setportnum_pipe(pipe, portnum) \ - ((pipe)|(portnum)) -#define usbif_pipeunlink(pipe) ((pipe) & 0x20) -#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) -#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20)) +#define USBIF_PIPE_PORT_MASK 0x0000001f +#define USBIF_PIPE_UNLINK 0x00000020 +#define USBIF_PIPE_DIR 0x00000080 +#define USBIF_PIPE_DEV_MASK 0x0000007f +#define USBIF_PIPE_DEV_SHIFT 8 +#define USBIF_PIPE_EP_MASK 0x0000000f +#define USBIF_PIPE_EP_SHIFT 15 +#define USBIF_PIPE_TYPE_MASK 0x00000003 +#define USBIF_PIPE_TYPE_SHIFT 30 +#define USBIF_PIPE_TYPE_ISOC 0 +#define USBIF_PIPE_TYPE_INT 1 +#define USBIF_PIPE_TYPE_CTRL 2 +#define USBIF_PIPE_TYPE_BULK 3 -#define USBIF_BACK_MAX_PENDING_REQS (128) +#define usbif_pipeportnum(pipe) ((pipe) & USBIF_PIPE_PORT_MASK) +#define usbif_setportnum_pipe(pipe, portnum) ((pipe) | (portnum)) + +#define usbif_pipeunlink(pipe) ((pipe) & USBIF_PIPE_UNLINK) +#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) +#define usbif_setunlink_pipe(pipe) ((pipe) | USBIF_PIPE_UNLINK) + +#define usbif_pipein(pipe) ((pipe) & USBIF_PIPE_DIR) +#define usbif_pipeout(pipe) (!usbif_pipein(pipe)) + +#define usbif_pipedevice(pipe) \ + (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK) + +#define usbif_pipeendpoint(pipe) \ + (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK) + +#define usbif_pipetype(pipe) \ + (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK) +#define usbif_pipeisoc(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC) +#define usbif_pipeint(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT) +#define usbif_pipectrl(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL) +#define usbif_pipebulk(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK) + #define USBIF_MAX_SEGMENTS_PER_REQUEST (16) +#define USBIF_MAX_PORTNR 31 /* * RING for transferring urbs. */ struct usbif_request_segment { grant_ref_t gref; uint16_t offset; uint16_t length; }; struct usbif_urb_request { uint16_t id; /* request id */ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ /* basic urb parameter */ uint32_t pipe; uint16_t transfer_flags; uint16_t buffer_length; union { uint8_t ctrl[8]; /* setup_packet (Ctrl) */ struct { uint16_t interval; /* maximum (1024*8) in usb core */ uint16_t start_frame; /* start frame */ uint16_t number_of_packets; /* number of ISO packet */ uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ } isoc; struct { uint16_t interval; /* maximum (1024*8) in usb core */ uint16_t pad[3]; } intr; struct { uint16_t unlink_id; /* unlink request id */ uint16_t pad[3]; } unlink; } u; /* urb data segments */ struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; }; typedef struct usbif_urb_request usbif_urb_request_t; struct usbif_urb_response { uint16_t id; /* request id */ uint16_t start_frame; /* start frame (ISO) */ int32_t status; /* status (non-ISO) */ int32_t actual_length; /* actual transfer length */ int32_t error_count; /* number of ISO errors */ }; typedef struct usbif_urb_response usbif_urb_response_t; DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); #define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE) /* * RING for notifying connect/disconnect events to frontend */ struct usbif_conn_request { uint16_t id; }; typedef struct usbif_conn_request usbif_conn_request_t; struct usbif_conn_response { uint16_t id; /* request id */ uint8_t portnum; /* port number */ uint8_t speed; /* usb_device_speed */ +#define USBIF_SPEED_NONE 0 +#define USBIF_SPEED_LOW 1 +#define USBIF_SPEED_FULL 2 +#define USBIF_SPEED_HIGH 3 }; typedef struct usbif_conn_response usbif_conn_response_t; DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); #define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE) #endif /* __XEN_PUBLIC_IO_USBIF_H__ */ Index: projects/clang370-import/sys/xen/interface/io/vscsiif.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/vscsiif.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/vscsiif.h (revision 288926) @@ -1,105 +1,260 @@ /****************************************************************************** * vscsiif.h - * + * * Based on the blkif.h code. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright(c) FUJITSU Limited 2008. */ #ifndef __XEN__PUBLIC_IO_SCSI_H__ #define __XEN__PUBLIC_IO_SCSI_H__ #include "ring.h" #include "../grant_table.h" -/* command between backend and frontend */ -#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ -#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ -#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * p-devname + * Values: string + * + * A free string used to identify the physical device (e.g. a disk name). + * + * p-dev + * Values: string + * + * A string specifying the backend device: either a 4-tuple "h:c:t:l" + * (host, controller, target, lun, all integers), or a WWN (e.g. + * "naa.60014054ac780582"). + * + * v-dev + * Values: string + * + * A string specifying the frontend device in form of a 4-tuple "h:c:t:l" + * (host, controller, target, lun, all integers). + * + *--------------------------------- Features --------------------------------- + * + * feature-sg-grant + * Values: unsigned [VSCSIIF_SG_TABLESIZE...65535] + * Default Value: 0 + * + * Specifies the maximum number of scatter/gather elements in grant pages + * supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE + * SG elements specified directly in the request. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + */ +/* Requests from the frontend to the backend */ -#define VSCSIIF_BACK_MAX_PENDING_REQS 128 +/* + * Request a SCSI operation specified via a CDB in vscsiif_request.cmnd. + * The target is specified via channel, id and lun. + * + * The operation to be performed is specified via a CDB in cmnd[], the length + * of the CDB is in cmd_len. sc_data_direction specifies the direction of data + * (to the device, from the device, or none at all). + * + * If data is to be transferred to or from the device the buffer(s) in the + * guest memory is/are specified via one or multiple scsiif_request_segment + * descriptors each specifying a memory page via a grant_ref_t, a offset into + * the page and the length of the area in that page. All scsiif_request_segment + * areas concatenated form the resulting data buffer used by the operation. + * If the number of scsiif_request_segment areas is not too large (less than + * or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the + * seg[] array and the number of valid scsiif_request_segment elements is to be + * set in nr_segments. + * + * If "feature-sg-grant" in the Xenstore is set it is possible to specify more + * than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection. + * The maximum number of allowed scsiif_request_segment elements is the value + * of the "feature-sg-grant" entry from Xenstore. When using indirection the + * seg[] array doesn't contain specifications of the data buffers, but + * references to scsiif_request_segment arrays, which in turn reference the + * data buffers. While nr_segments holds the number of populated seg[] entries + * (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment + * elements referencing the target data buffers is calculated from the lengths + * of the seg[] elements (the sum of all valid seg[].length divided by the + * size of one scsiif_request_segment structure). The frontend may use a mix of + * direct and indirect requests. + */ +#define VSCSIIF_ACT_SCSI_CDB 1 /* + * Request abort of a running operation for the specified target given by + * channel, id, lun and the operation's rqid in ref_rqid. + */ +#define VSCSIIF_ACT_SCSI_ABORT 2 + +/* + * Request a device reset of the specified target (channel and id). + */ +#define VSCSIIF_ACT_SCSI_RESET 3 + +/* + * Preset scatter/gather elements for a following request. Deprecated. + * Keeping the define only to avoid usage of the value "4" for other actions. + */ +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 + +/* * Maximum scatter/gather segments per request. * - * Considering balance between allocating al least 16 "vscsiif_request" - * structures on one page (4096bytes) and number of scatter gather - * needed, we decided to use 26 as a magic number. + * Considering balance between allocating at least 16 "vscsiif_request" + * structures on one page (4096 bytes) and the number of scatter/gather + * elements needed, we decided to use 26 as a magic number. + * + * If "feature-sg-grant" is set, more scatter/gather elements can be specified + * by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages. + * In this case the vscsiif_request seg elements don't contain references to + * the user data, but to the SG elements referencing the user data. */ #define VSCSIIF_SG_TABLESIZE 26 /* - * base on linux kernel 2.6.18 + * based on Linux kernel 2.6.18, still valid + * + * Changing these values requires support of multiple protocols via the rings + * as "old clients" will blindly use these values and the resulting structure + * sizes. */ #define VSCSIIF_MAX_COMMAND_SIZE 16 #define VSCSIIF_SENSE_BUFFERSIZE 96 +struct scsiif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; +typedef struct scsiif_request_segment vscsiif_segment_t; +#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment)) + +/* Size of one request is 252 bytes */ struct vscsiif_request { uint16_t rqid; /* private guest value, echoed in resp */ uint8_t act; /* command between backend and frontend */ - uint8_t cmd_len; + uint8_t cmd_len; /* valid CDB bytes */ - uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; - uint16_t timeout_per_command; /* The command is issued by twice - the value in Backend. */ - uint16_t channel, id, lun; - uint16_t padding; - uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) - DMA_FROM_DEVICE(2) - DMA_NONE(3) requests */ - uint8_t nr_segments; /* Number of pieces of scatter-gather */ + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */ + uint16_t timeout_per_command; /* deprecated: timeout in secs, 0=default */ + uint16_t channel, id, lun; /* (virtual) device specification */ + uint16_t ref_rqid; /* command abort reference */ + uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) + DMA_FROM_DEVICE(2) + DMA_NONE(3) requests */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ +/* + * flag in nr_segments: SG elements via grant page + * + * If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number + * of grant pages containing SG elements. Usable if "feature-sg-grant" set. + */ +#define VSCSIIF_SG_GRANT 0x80 - struct scsiif_request_segment { - grant_ref_t gref; - uint16_t offset; - uint16_t length; - } seg[VSCSIIF_SG_TABLESIZE]; + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; uint32_t reserved[3]; }; typedef struct vscsiif_request vscsiif_request_t; +/* + * The following interface is deprecated! + */ +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ + / sizeof(vscsiif_segment_t)) + +struct vscsiif_sg_list { + /* First two fields must match struct vscsiif_request! */ + uint16_t rqid; /* private guest value, must match main req */ + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; +}; +typedef struct vscsiif_sg_list vscsiif_sg_list_t; +/* End of deprecated interface */ + +/* Size of one response is 252 bytes */ struct vscsiif_response { - uint16_t rqid; - uint8_t padding; + uint16_t rqid; /* identifies request */ + uint8_t act; /* deprecated: valid only if SG_PRESET supported */ uint8_t sense_len; uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; int32_t rslt; uint32_t residual_len; /* request bufflen - return the value from physical device */ uint32_t reserved[36]; }; typedef struct vscsiif_response vscsiif_response_t; DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response); #endif /*__XEN__PUBLIC_IO_SCSI_H__*/ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/xenbus.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/xenbus.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/xenbus.h (revision 288926) @@ -1,98 +1,80 @@ /***************************************************************************** * xenbus.h * * Xenbus protocol details. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (C) 2005 XenSource Ltd. */ #ifndef _XEN_PUBLIC_IO_XENBUS_H #define _XEN_PUBLIC_IO_XENBUS_H /* * The state of either end of the Xenbus, i.e. the current communication * status of initialisation across the bus. States here imply nothing about * the state of the connection between the driver and the kernel's device * layers. */ enum xenbus_state { XenbusStateUnknown = 0, - /* - * Initializing: Back-end is initializing. - */ XenbusStateInitialising = 1, /* * InitWait: Finished early initialisation but waiting for information * from the peer or hotplug scripts. */ XenbusStateInitWait = 2, /* * Initialised: Waiting for a connection from the peer. */ XenbusStateInitialised = 3, - /* - * Connected: The normal state for a front to backend connection. - */ XenbusStateConnected = 4, /* * Closing: The device is being closed due to an error or an unplug event. */ XenbusStateClosing = 5, - /* - * Closed: No connection exists between front and back end. - * - * For backend devices with the "online" attribute, the front can - * request a reconnect at any time. To handle this transition - * gracefully, backend devices must reinitialize any XenStore data - * used to negotiate features with a peer before transitioning to - * the closed state. When a reconnect request occurs, the - * XenBus backend support code will automatically transition the - * backend device from Closed to InitWait, kicking off the ring - * and feature negotiation process. - */ XenbusStateClosed = 6, /* * Reconfiguring: The device is being reconfigured. */ XenbusStateReconfiguring = 7, XenbusStateReconfigured = 8 }; typedef enum xenbus_state XenbusState; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/io/xs_wire.h =================================================================== --- projects/clang370-import/sys/xen/interface/io/xs_wire.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/io/xs_wire.h (revision 288926) @@ -1,134 +1,149 @@ /* * Details of the "wire" protocol between Xen Store Daemon and client * library or guest kernel. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (C) 2005 Rusty Russell IBM Corporation */ #ifndef _XS_WIRE_H #define _XS_WIRE_H enum xsd_sockmsg_type { XS_DEBUG, XS_DIRECTORY, XS_READ, XS_GET_PERMS, XS_WATCH, XS_UNWATCH, XS_TRANSACTION_START, XS_TRANSACTION_END, XS_INTRODUCE, XS_RELEASE, XS_GET_DOMAIN_PATH, XS_WRITE, XS_MKDIR, XS_RM, XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, XS_IS_DOMAIN_INTRODUCED, XS_RESUME, XS_SET_TARGET, XS_RESTRICT, - XS_RESET_WATCHES + XS_RESET_WATCHES, + + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ }; #define XS_WRITE_NONE "NONE" #define XS_WRITE_CREATE "CREATE" #define XS_WRITE_CREATE_EXCL "CREATE|EXCL" /* We hand errors as strings, for portability. */ struct xsd_errors { int errnum; const char *errstring; }; #ifdef EINVAL #define XSD_ERROR(x) { x, #x } /* LINTED: static unused */ static struct xsd_errors xsd_errors[] #if defined(__GNUC__) __attribute__((unused)) #endif = { XSD_ERROR(EINVAL), XSD_ERROR(EACCES), XSD_ERROR(EEXIST), XSD_ERROR(EISDIR), XSD_ERROR(ENOENT), XSD_ERROR(ENOMEM), XSD_ERROR(ENOSPC), XSD_ERROR(EIO), XSD_ERROR(ENOTEMPTY), XSD_ERROR(ENOSYS), XSD_ERROR(EROFS), XSD_ERROR(EBUSY), XSD_ERROR(EAGAIN), - XSD_ERROR(EISCONN) + XSD_ERROR(EISCONN), + XSD_ERROR(E2BIG) }; #endif struct xsd_sockmsg { uint32_t type; /* XS_??? */ uint32_t req_id;/* Request identifier, echoed in daemon's response. */ uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ uint32_t len; /* Length of data following this. */ /* Generally followed by nul-terminated string(s). */ }; enum xs_watch_type { XS_WATCH_PATH = 0, XS_WATCH_TOKEN }; -/* Inter-domain shared memory communications. */ +/* + * `incontents 150 xenstore_struct XenStore wire protocol. + * + * Inter-domain shared memory communications. */ #define XENSTORE_RING_SIZE 1024 typedef uint32_t XENSTORE_RING_IDX; #define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) struct xenstore_domain_interface { char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ XENSTORE_RING_IDX req_cons, req_prod; XENSTORE_RING_IDX rsp_cons, rsp_prod; + uint32_t server_features; /* Bitmap of features supported by the server */ + uint32_t connection; }; /* Violating this is very bad. See docs/misc/xenstore.txt. */ #define XENSTORE_PAYLOAD_MAX 4096 /* Violating these just gets you an error back */ #define XENSTORE_ABS_PATH_MAX 3072 #define XENSTORE_REL_PATH_MAX 2048 +/* The ability to reconnect a ring */ +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 + +/* Valid values for the connection field */ +#define XENSTORE_CONNECTED 0 /* the steady-state */ +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ + #endif /* _XS_WIRE_H */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/kexec.h =================================================================== --- projects/clang370-import/sys/xen/interface/kexec.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/kexec.h (revision 288926) @@ -1,168 +1,249 @@ /****************************************************************************** * kexec.h - Public portion * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Xen port written by: * - Simon 'Horms' Horman * - Magnus Damm */ #ifndef _XEN_PUBLIC_KEXEC_H #define _XEN_PUBLIC_KEXEC_H /* This file describes the Kexec / Kdump hypercall interface for Xen. * * Kexec under vanilla Linux allows a user to reboot the physical machine * into a new user-specified kernel. The Xen port extends this idea * to allow rebooting of the machine from dom0. When kexec for dom0 * is used to reboot, both the hypervisor and the domains get replaced * with some other kernel. It is possible to kexec between vanilla * Linux and Xen and back again. Xen to Xen works well too. * * The hypercall interface for kexec can be divided into three main * types of hypercall operations: * * 1) Range information: * This is used by the dom0 kernel to ask the hypervisor about various * address information. This information is needed to allow kexec-tools * to fill in the ELF headers for /proc/vmcore properly. * * 2) Load and unload of images: * There are no big surprises here, the kexec binary from kexec-tools * runs in userspace in dom0. The tool loads/unloads data into the * dom0 kernel such as new kernel, initramfs and hypervisor. When * loaded the dom0 kernel performs a load hypercall operation, and * before releasing all page references the dom0 kernel calls unload. * * 3) Kexec operation: * This is used to start a previously loaded kernel. */ #include "xen.h" #if defined(__i386__) || defined(__x86_64__) #define KEXEC_XEN_NO_PAGES 17 #endif /* * Prototype for this hypercall is: * int kexec_op(int cmd, void *args) * @cmd == KEXEC_CMD_... * KEXEC operation to perform * @args == Operation-specific extra arguments (NULL if none). */ /* * Kexec supports two types of operation: * - kexec into a regular kernel, very similar to a standard reboot * - KEXEC_TYPE_DEFAULT is used to specify this type * - kexec into a special "crash kernel", aka kexec-on-panic * - KEXEC_TYPE_CRASH is used to specify this type * - parts of our system may be broken at kexec-on-panic time * - the code should be kept as simple and self-contained as possible */ #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 /* The kexec implementation for Xen allows the user to load two * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. * All data needed for a kexec reboot is kept in one xen_kexec_image_t * per "instance". The data mainly consists of machine address lists to pages * together with destination addresses. The data in xen_kexec_image_t * is passed to the "code page" which is one page of code that performs * the final relocations before jumping to the new kernel. */ typedef struct xen_kexec_image { #if defined(__i386__) || defined(__x86_64__) unsigned long page_list[KEXEC_XEN_NO_PAGES]; #endif -#if defined(__ia64__) - unsigned long reboot_code_buffer; -#endif unsigned long indirection_page; unsigned long start_address; } xen_kexec_image_t; /* * Perform kexec having previously loaded a kexec or kdump kernel * as appropriate. * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * + * Control is transferred to the image entry point with the host in + * the following state. + * + * - The image may be executed on any PCPU and all other PCPUs are + * stopped. + * + * - Local interrupts are disabled. + * + * - Register values are undefined. + * + * - The image segments have writeable 1:1 virtual to machine + * mappings. The location of any page tables is undefined and these + * page table frames are not be mapped. */ #define KEXEC_CMD_kexec 0 typedef struct xen_kexec_exec { int type; } xen_kexec_exec_t; /* * Load/Unload kernel image for kexec or kdump. * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] * image == relocation information for kexec (ignored for unload) [in] */ -#define KEXEC_CMD_kexec_load 1 -#define KEXEC_CMD_kexec_unload 2 -typedef struct xen_kexec_load { +#define KEXEC_CMD_kexec_load_v1 1 /* obsolete since 0x00040400 */ +#define KEXEC_CMD_kexec_unload_v1 2 /* obsolete since 0x00040400 */ +typedef struct xen_kexec_load_v1 { int type; xen_kexec_image_t image; -} xen_kexec_load_t; +} xen_kexec_load_v1_t; #define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ #define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ #define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ #define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap * Note that although this is adjacent * to Xen it exists in a separate EFI * region on ia64, and thus needs to be * inserted into iomem_machine separately */ -#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* Obsolete: machine address and size of * the ia64_boot_param */ #define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of * of the EFI Memory Map */ #define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ /* * Find the address and size of certain memory areas * range == KEXEC_RANGE_... [in] * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] * size == number of bytes reserved in window [out] * start == address of the first byte in the window [out] */ #define KEXEC_CMD_kexec_get_range 3 typedef struct xen_kexec_range { int range; int nr; unsigned long size; unsigned long start; } xen_kexec_range_t; +#if __XEN_INTERFACE_VERSION__ >= 0x00040400 +/* + * A contiguous chunk of a kexec image and it's destination machine + * address. + */ +typedef struct xen_kexec_segment { + union { + XEN_GUEST_HANDLE(const_void) h; + uint64_t _pad; + } buf; + uint64_t buf_size; + uint64_t dest_maddr; + uint64_t dest_size; +} xen_kexec_segment_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_segment_t); + +/* + * Load a kexec image into memory. + * + * For KEXEC_TYPE_DEFAULT images, the segments may be anywhere in RAM. + * The image is relocated prior to being executed. + * + * For KEXEC_TYPE_CRASH images, each segment of the image must reside + * in the memory region reserved for kexec (KEXEC_RANGE_MA_CRASH) and + * the entry point must be within the image. The caller is responsible + * for ensuring that multiple images do not overlap. + * + * All image segments will be loaded to their destination machine + * addresses prior to being executed. The trailing portion of any + * segments with a source buffer (from dest_maddr + buf_size to + * dest_maddr + dest_size) will be zeroed. + * + * Segments with no source buffer will be accessible to the image when + * it is executed. + */ + +#define KEXEC_CMD_kexec_load 4 +typedef struct xen_kexec_load { + uint8_t type; /* One of KEXEC_TYPE_* */ + uint8_t _pad; + uint16_t arch; /* ELF machine type (EM_*). */ + uint32_t nr_segments; + union { + XEN_GUEST_HANDLE(xen_kexec_segment_t) h; + uint64_t _pad; + } segments; + uint64_t entry_maddr; /* image entry point machine address. */ +} xen_kexec_load_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_load_t); + +/* + * Unload a kexec image. + * + * Type must be one of KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH. + */ +#define KEXEC_CMD_kexec_unload 5 +typedef struct xen_kexec_unload { + uint8_t type; +} xen_kexec_unload_t; +DEFINE_XEN_GUEST_HANDLE(xen_kexec_unload_t); + +#else /* __XEN_INTERFACE_VERSION__ < 0x00040400 */ + +#define KEXEC_CMD_kexec_load KEXEC_CMD_kexec_load_v1 +#define KEXEC_CMD_kexec_unload KEXEC_CMD_kexec_unload_v1 +#define xen_kexec_load xen_kexec_load_v1 +#define xen_kexec_load_t xen_kexec_load_v1_t + +#endif + #endif /* _XEN_PUBLIC_KEXEC_H */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/memory.h =================================================================== --- projects/clang370-import/sys/xen/interface/memory.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/memory.h (revision 288926) @@ -1,445 +1,623 @@ /****************************************************************************** * memory.h * * Memory reservation and information. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ #include "xen.h" +#include "physdev.h" /* * Increase or decrease the specified domain's memory reservation. Returns the * number of extents successfully allocated or freed. * arg == addr of struct xen_memory_reservation. */ #define XENMEM_increase_reservation 0 #define XENMEM_decrease_reservation 1 #define XENMEM_populate_physmap 6 #if __XEN_INTERFACE_VERSION__ >= 0x00030209 /* * Maximum # bits addressable by the user of the allocated region (e.g., I/O * devices often have a 32-bit limitation even in 64-bit systems). If zero * then the user has no addressing restriction. This field is not used by * XENMEM_decrease_reservation. */ #define XENMEMF_address_bits(x) (x) #define XENMEMF_get_address_bits(x) ((x) & 0xffu) /* NUMA node to allocate from. */ #define XENMEMF_node(x) (((x) + 1) << 8) #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) /* Flag to populate physmap with populate-on-demand entries */ #define XENMEMF_populate_on_demand (1<<16) /* Flag to request allocation only from the node specified */ #define XENMEMF_exact_node_request (1<<17) #define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +/* Flag to indicate the node specified is virtual node */ +#define XENMEMF_vnode (1<<18) #endif struct xen_memory_reservation { /* * XENMEM_increase_reservation: * OUT: MFN (*not* GMFN) bases of extents that were allocated * XENMEM_decrease_reservation: * IN: GMFN bases of extents to free * XENMEM_populate_physmap: * IN: GPFN bases of extents to populate with memory * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) + * XENMEM_claim_pages: + * IN: must be zero */ XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ xen_ulong_t nr_extents; unsigned int extent_order; #if __XEN_INTERFACE_VERSION__ >= 0x00030209 /* XENMEMF flags. */ unsigned int mem_flags; #else unsigned int address_bits; #endif /* * Domain whose reservation is being changed. * Unprivileged domains can specify only DOMID_SELF. */ domid_t domid; }; typedef struct xen_memory_reservation xen_memory_reservation_t; DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); /* * An atomic exchange of memory pages. If return code is zero then * @out.extent_list provides GMFNs of the newly-allocated memory. * Returns zero on complete success, otherwise a negative error code. * On complete success then always @nr_exchanged == @in.nr_extents. * On partial success @nr_exchanged indicates how much work was done. */ #define XENMEM_exchange 11 struct xen_memory_exchange { /* * [IN] Details of memory extents to be exchanged (GMFN bases). * Note that @in.address_bits is ignored and unused. */ struct xen_memory_reservation in; /* * [IN/OUT] Details of new memory extents. * We require that: * 1. @in.domid == @out.domid * 2. @in.nr_extents << @in.extent_order == * @out.nr_extents << @out.extent_order * 3. @in.extent_start and @out.extent_start lists must not overlap * 4. @out.extent_start lists GPFN bases to be populated * 5. @out.extent_start is overwritten with allocated GMFN bases */ struct xen_memory_reservation out; /* * [OUT] Number of input extents that were successfully exchanged: * 1. The first @nr_exchanged input extents were successfully * deallocated. * 2. The corresponding first entries in the output extent list correctly * indicate the GMFNs that were successfully exchanged. * 3. All other input and output extents are untouched. * 4. If not all input exents are exchanged then the return code of this * command will be non-zero. * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! */ xen_ulong_t nr_exchanged; }; typedef struct xen_memory_exchange xen_memory_exchange_t; DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); /* * Returns the maximum machine frame number of mapped RAM in this system. * This command always succeeds (it never returns an error code). * arg == NULL. */ #define XENMEM_maximum_ram_page 2 /* * Returns the current or maximum memory reservation, in pages, of the * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. * arg == addr of domid_t. */ #define XENMEM_current_reservation 3 #define XENMEM_maximum_reservation 4 /* * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. */ #define XENMEM_maximum_gpfn 14 /* * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys * mapping table. Architectures which do not have a m2p table do not implement * this command. * arg == addr of xen_machphys_mfn_list_t. */ #define XENMEM_machphys_mfn_list 5 struct xen_machphys_mfn_list { /* * Size of the 'extent_start' array. Fewer entries will be filled if the * machphys table is smaller than max_extents * 2MB. */ unsigned int max_extents; /* * Pointer to buffer to fill with list of extent starts. If there are * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller * than 'max_extents' if the machphys table is smaller than max_e * 2MB. */ unsigned int nr_extents; }; typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); /* + * For a compat caller, this is identical to XENMEM_machphys_mfn_list. + * + * For a non compat caller, this functions similarly to + * XENMEM_machphys_mfn_list, but returns the mfns making up the compatibility + * m2p table. + */ +#define XENMEM_machphys_compat_mfn_list 25 + +/* * Returns the location in virtual address space of the machine_to_phys * mapping table. Architectures which do not have a m2p table, or which do not * map it by default into guest address space, do not implement this command. * arg == addr of xen_machphys_mapping_t. */ #define XENMEM_machphys_mapping 12 struct xen_machphys_mapping { xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ }; typedef struct xen_machphys_mapping xen_machphys_mapping_t; DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); +/* Source mapping space. */ +/* ` enum phys_map_space { */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ #define XENMAPSPACE_gmfn 2 /* GMFN */ #define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, - * XENMEM_add_to_physmap_range only. - */ + * XENMEM_add_to_physmap_batch only. */ +/* ` } */ /* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. * arg == addr of xen_add_to_physmap_t. */ #define XENMEM_add_to_physmap 7 struct xen_add_to_physmap { /* Which domain to change the mapping for. */ domid_t domid; /* Number of pages to go through for gmfn_range */ uint16_t size; - /* Source mapping space. */ -#define XENMAPSPACE_shared_info 0 /* shared info page */ -#define XENMAPSPACE_grant_table 1 /* grant table page */ -#define XENMAPSPACE_gmfn 2 /* GMFN */ -#define XENMAPSPACE_gmfn_range 3 /* GMFN range */ - unsigned int space; + unsigned int space; /* => enum phys_map_space */ #define XENMAPIDX_grant_table_status 0x80000000 - /* Index into source mapping space. */ + /* Index into space being mapped. */ xen_ulong_t idx; - /* GPFN where the source mapping page should appear. */ + /* GPFN in domid where the source mapping page should appear. */ xen_pfn_t gpfn; }; typedef struct xen_add_to_physmap xen_add_to_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); -/* - * Unmaps the page appearing at a particular GPFN from the specified guest's - * pseudophysical address space. - * arg == addr of xen_remove_from_physmap_t. - */ -#define XENMEM_remove_from_physmap 15 -struct xen_remove_from_physmap { - /* Which domain to change the mapping for. */ - domid_t domid; - - /* GPFN of the current mapping of the page. */ - xen_pfn_t gpfn; -}; -typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; -DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); - -/*** REMOVED ***/ -/*#define XENMEM_translate_gpfn_list 8*/ - -#define XENMEM_add_to_physmap_range 23 -struct xen_add_to_physmap_range { +/* A batched version of add_to_physmap. */ +#define XENMEM_add_to_physmap_batch 23 +struct xen_add_to_physmap_batch { /* IN */ /* Which domain to change the mapping for. */ domid_t domid; uint16_t space; /* => enum phys_map_space */ /* Number of pages to go through */ uint16_t size; domid_t foreign_domid; /* IFF gmfn_foreign */ /* Indexes into space being mapped. */ XEN_GUEST_HANDLE(xen_ulong_t) idxs; /* GPFN in domid where the source mapping page should appear. */ XEN_GUEST_HANDLE(xen_pfn_t) gpfns; /* OUT */ /* Per index error code. */ XEN_GUEST_HANDLE(int) errs; }; -typedef struct xen_add_to_physmap_range xen_add_to_physmap_range_t; +typedef struct xen_add_to_physmap_batch xen_add_to_physmap_batch_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_batch_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +#define XENMEM_add_to_physmap_range XENMEM_add_to_physmap_batch +#define xen_add_to_physmap_range xen_add_to_physmap_batch +typedef struct xen_add_to_physmap_batch xen_add_to_physmap_range_t; DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t); +#endif /* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ + +/* * Returns the pseudo-physical memory map as it was when the domain * was started (specified by XENMEM_set_memory_map). * arg == addr of xen_memory_map_t. */ #define XENMEM_memory_map 9 struct xen_memory_map { /* * On call the number of entries which can be stored in buffer. On * return the number of entries which have been stored in * buffer. */ unsigned int nr_entries; /* * Entries in the buffer are in the same format as returned by the * BIOS INT 0x15 EAX=0xE820 call. */ XEN_GUEST_HANDLE(void) buffer; }; typedef struct xen_memory_map xen_memory_map_t; DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); /* * Returns the real physical memory map. Passes the same structure as * XENMEM_memory_map. * arg == addr of xen_memory_map_t. */ #define XENMEM_machine_memory_map 10 /* * Set the pseudo-physical memory map of a domain, as returned by * XENMEM_memory_map. * arg == addr of xen_foreign_memory_map_t. */ #define XENMEM_set_memory_map 13 struct xen_foreign_memory_map { domid_t domid; struct xen_memory_map map; }; typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); #define XENMEM_set_pod_target 16 #define XENMEM_get_pod_target 17 struct xen_pod_target { /* IN */ uint64_t target_pages; /* OUT */ uint64_t tot_pages; uint64_t pod_cache_pages; uint64_t pod_entries; /* IN */ domid_t domid; }; typedef struct xen_pod_target xen_pod_target_t; #if defined(__XEN__) || defined(__XEN_TOOLS__) #ifndef uint64_aligned_t #define uint64_aligned_t uint64_t #endif /* * Get the number of MFNs saved through memory sharing. * The call never fails. */ #define XENMEM_get_sharing_freed_pages 18 #define XENMEM_get_sharing_shared_pages 19 #define XENMEM_paging_op 20 #define XENMEM_paging_op_nominate 0 #define XENMEM_paging_op_evict 1 #define XENMEM_paging_op_prep 2 -#define XENMEM_access_op 21 -#define XENMEM_access_op_resume 0 - -struct xen_mem_event_op { - uint8_t op; /* XENMEM_*_op_* */ +struct xen_mem_paging_op { + uint8_t op; /* XENMEM_paging_op_* */ domid_t domain; - /* PAGING_PREP IN: buffer to immediately fill page in */ uint64_aligned_t buffer; /* Other OPs */ uint64_aligned_t gfn; /* IN: gfn of page being operated on */ }; -typedef struct xen_mem_event_op xen_mem_event_op_t; -DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); +typedef struct xen_mem_paging_op xen_mem_paging_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_paging_op_t); +#define XENMEM_access_op 21 +#define XENMEM_access_op_set_access 0 +#define XENMEM_access_op_get_access 1 +#define XENMEM_access_op_enable_emulate 2 +#define XENMEM_access_op_disable_emulate 3 + +typedef enum { + XENMEM_access_n, + XENMEM_access_r, + XENMEM_access_w, + XENMEM_access_rw, + XENMEM_access_x, + XENMEM_access_rx, + XENMEM_access_wx, + XENMEM_access_rwx, + /* + * Page starts off as r-x, but automatically + * change to r-w on a write + */ + XENMEM_access_rx2rw, + /* + * Log access: starts off as n, automatically + * goes to rwx, generating an event without + * pausing the vcpu + */ + XENMEM_access_n2rwx, + /* Take the domain default */ + XENMEM_access_default +} xenmem_access_t; + +struct xen_mem_access_op { + /* XENMEM_access_op_* */ + uint8_t op; + /* xenmem_access_t */ + uint8_t access; + domid_t domid; + /* + * Number of pages for set op + * Ignored on setting default access and other ops + */ + uint32_t nr; + /* + * First pfn for set op + * pfn for get op + * ~0ull is used to set and get the default access for pages + */ + uint64_aligned_t pfn; +}; +typedef struct xen_mem_access_op xen_mem_access_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); + #define XENMEM_sharing_op 22 #define XENMEM_sharing_op_nominate_gfn 0 #define XENMEM_sharing_op_nominate_gref 1 #define XENMEM_sharing_op_share 2 -#define XENMEM_sharing_op_resume 3 -#define XENMEM_sharing_op_debug_gfn 4 -#define XENMEM_sharing_op_debug_mfn 5 -#define XENMEM_sharing_op_debug_gref 6 -#define XENMEM_sharing_op_add_physmap 7 -#define XENMEM_sharing_op_audit 8 +#define XENMEM_sharing_op_debug_gfn 3 +#define XENMEM_sharing_op_debug_mfn 4 +#define XENMEM_sharing_op_debug_gref 5 +#define XENMEM_sharing_op_add_physmap 6 +#define XENMEM_sharing_op_audit 7 #define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) #define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) /* The following allows sharing of grant refs. This is useful * for sharing utilities sitting as "filters" in IO backends * (e.g. memshr + blktap(2)). The IO backend is only exposed * to grant references, and this allows sharing of the grefs */ #define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) #define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) #define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) #define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) struct xen_mem_sharing_op { uint8_t op; /* XENMEM_sharing_op_* */ domid_t domain; union { struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ union { uint64_aligned_t gfn; /* IN: gfn to nominate */ uint32_t grant_ref; /* IN: grant ref to nominate */ } u; uint64_aligned_t handle; /* OUT: the handle */ } nominate; struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ uint64_aligned_t source_handle; /* IN: handle to the source page */ uint64_aligned_t client_gfn; /* IN: the client gfn */ uint64_aligned_t client_handle; /* IN: handle to the client page */ domid_t client_domain; /* IN: the client domain id */ } share; struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ union { uint64_aligned_t gfn; /* IN: gfn to debug */ uint64_aligned_t mfn; /* IN: mfn to debug */ uint32_t gref; /* IN: gref to debug */ } u; } debug; } u; }; typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); +/* + * Attempt to stake a claim for a domain on a quantity of pages + * of system RAM, but _not_ assign specific pageframes. Only + * arithmetic is performed so the hypercall is very fast and need + * not be preemptible, thus sidestepping time-of-check-time-of-use + * races for memory allocation. Returns 0 if the hypervisor page + * allocator has atomically and successfully claimed the requested + * number of pages, else non-zero. + * + * Any domain may have only one active claim. When sufficient memory + * has been allocated to resolve the claim, the claim silently expires. + * Claiming zero pages effectively resets any outstanding claim and + * is always successful. + * + * Note that a valid claim may be staked even after memory has been + * allocated for a domain. In this case, the claim is not incremental, + * i.e. if the domain's tot_pages is 3, and a claim is staked for 10, + * only 7 additional pages are claimed. + * + * Caller must be privileged or the hypercall fails. + */ +#define XENMEM_claim_pages 24 + +/* + * XENMEM_claim_pages flags - the are no flags at this time. + * The zero value is appropiate. + */ + +/* + * With some legacy devices, certain guest-physical addresses cannot safely + * be used for other purposes, e.g. to map guest RAM. This hypercall + * enumerates those regions so the toolstack can avoid using them. + */ +#define XENMEM_reserved_device_memory_map 27 +struct xen_reserved_device_memory { + xen_pfn_t start_pfn; + xen_ulong_t nr_pages; +}; +typedef struct xen_reserved_device_memory xen_reserved_device_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t); + +struct xen_reserved_device_memory_map { +#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */ + /* IN */ + uint32_t flags; + /* + * IN/OUT + * + * Gets set to the required number of entries when too low, + * signaled by error code -ERANGE. + */ + unsigned int nr_entries; + /* OUT */ + XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer; + /* IN */ + union { + struct physdev_pci_device pci; + } dev; +}; +typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t); + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +/* + * XENMEM_get_vnumainfo used by guest to get + * vNUMA topology from hypervisor. + */ +#define XENMEM_get_vnumainfo 26 + +/* vNUMA node memory ranges */ +struct xen_vmemrange { + uint64_t start, end; + unsigned int flags; + unsigned int nid; +}; +typedef struct xen_vmemrange xen_vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(xen_vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, + * memory ranges and vcpu mapping provided for guests. + * XENMEM_get_vnumainfo hypercall expects to see from guest + * nr_vnodes, nr_vmemranges and nr_vcpus to indicate available memory. + * After filling guests structures, nr_vnodes, nr_vmemranges and nr_vcpus + * copied back to guest. Domain returns expected values of nr_vnodes, + * nr_vmemranges and nr_vcpus to guest if the values where incorrect. + */ +struct xen_vnuma_topology_info { + /* IN */ + domid_t domid; + uint16_t pad; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + unsigned int nr_vmemranges; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(xen_vmemrange_t) h; + uint64_t pad; + } vmemrange; +}; +typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t); + +/* Next available subop number is 28 */ + #endif /* __XEN_PUBLIC_MEMORY_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/nmi.h =================================================================== --- projects/clang370-import/sys/xen/interface/nmi.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/nmi.h (revision 288926) @@ -1,80 +1,85 @@ /****************************************************************************** * nmi.h * * NMI callback registration and reason codes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_NMI_H__ #define __XEN_PUBLIC_NMI_H__ #include "xen.h" /* * NMI reason codes: * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. */ /* I/O-check error reported via ISA port 0x61, bit 6. */ #define _XEN_NMIREASON_io_error 0 #define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* PCI SERR reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_pci_serr 1 +#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr) +#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */ /* Parity error reported via ISA port 0x61, bit 7. */ #define _XEN_NMIREASON_parity_error 1 #define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) +#endif /* Unknown hardware-generated NMI. */ #define _XEN_NMIREASON_unknown 2 #define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) /* * long nmi_op(unsigned int cmd, void *arg) * NB. All ops return zero on success, else a negative error code. */ /* * Register NMI callback for this (calling) VCPU. Currently this only makes * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. * arg == pointer to xennmi_callback structure. */ #define XENNMI_register_callback 0 struct xennmi_callback { unsigned long handler_address; unsigned long pad; }; typedef struct xennmi_callback xennmi_callback_t; DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); /* * Deregister NMI callback for this (calling) VCPU. * arg == NULL. */ #define XENNMI_unregister_callback 1 #endif /* __XEN_PUBLIC_NMI_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/physdev.h =================================================================== --- projects/clang370-import/sys/xen/interface/physdev.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/physdev.h (revision 288926) @@ -1,353 +1,387 @@ /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser */ #ifndef __XEN_PUBLIC_PHYSDEV_H__ #define __XEN_PUBLIC_PHYSDEV_H__ #include "xen.h" /* * Prototype for this hypercall is: * int physdev_op(int cmd, void *args) * @cmd == PHYSDEVOP_??? (physdev operation). * @args == Operation-specific extra arguments (NULL if none). */ /* * Notify end-of-interrupt (EOI) for the specified IRQ. * @arg == pointer to physdev_eoi structure. */ #define PHYSDEVOP_eoi 12 struct physdev_eoi { /* IN */ uint32_t irq; }; typedef struct physdev_eoi physdev_eoi_t; DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); /* * Register a shared page for the hypervisor to indicate whether the guest * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly * once the guest used this function in that the associated event channel * will automatically get unmasked. The page registered is used as a bit * array indexed by Xen's PIRQ value. */ #define PHYSDEVOP_pirq_eoi_gmfn_v1 17 /* * Register a shared page for the hypervisor to indicate whether the * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by * Xen's PIRQ value. */ #define PHYSDEVOP_pirq_eoi_gmfn_v2 28 struct physdev_pirq_eoi_gmfn { /* IN */ xen_pfn_t gmfn; }; typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); /* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ #define PHYSDEVOP_irq_status_query 5 struct physdev_irq_status_query { /* IN */ uint32_t irq; /* OUT */ uint32_t flags; /* XENIRQSTAT_* */ }; typedef struct physdev_irq_status_query physdev_irq_status_query_t; DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ #define _XENIRQSTAT_needs_eoi (0) #define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) /* IRQ shared by multiple guests? */ #define _XENIRQSTAT_shared (1) #define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) /* * Set the current VCPU's I/O privilege level. * @arg == pointer to physdev_set_iopl structure. */ #define PHYSDEVOP_set_iopl 6 struct physdev_set_iopl { /* IN */ uint32_t iopl; }; typedef struct physdev_set_iopl physdev_set_iopl_t; DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); /* * Set the current VCPU's I/O-port permissions bitmap. * @arg == pointer to physdev_set_iobitmap structure. */ #define PHYSDEVOP_set_iobitmap 7 struct physdev_set_iobitmap { /* IN */ #if __XEN_INTERFACE_VERSION__ >= 0x00030205 XEN_GUEST_HANDLE(uint8) bitmap; #else uint8_t *bitmap; #endif uint32_t nr_ports; }; typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); /* * Read or write an IO-APIC register. * @arg == pointer to physdev_apic structure. */ #define PHYSDEVOP_apic_read 8 #define PHYSDEVOP_apic_write 9 struct physdev_apic { /* IN */ unsigned long apic_physbase; uint32_t reg; /* IN or OUT */ uint32_t value; }; typedef struct physdev_apic physdev_apic_t; DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); /* * Allocate or free a physical upcall vector for the specified IRQ line. * @arg == pointer to physdev_irq structure. */ #define PHYSDEVOP_alloc_irq_vector 10 #define PHYSDEVOP_free_irq_vector 11 struct physdev_irq { /* IN */ uint32_t irq; /* IN or OUT */ uint32_t vector; }; typedef struct physdev_irq physdev_irq_t; DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 #define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define MAP_PIRQ_TYPE_MULTI_MSI 0x4 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { domid_t domid; /* IN */ int type; - /* IN */ + /* IN (ignored for ..._MULTI_MSI) */ int index; /* IN or OUT */ int pirq; - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ int bus; /* IN */ int devfn; - /* IN */ + /* IN (also OUT for ..._MULTI_MSI) */ int entry_nr; /* IN */ uint64_t table_base; }; typedef struct physdev_map_pirq physdev_map_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); #define PHYSDEVOP_unmap_pirq 14 struct physdev_unmap_pirq { domid_t domid; /* IN */ int pirq; }; typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); #define PHYSDEVOP_manage_pci_add 15 #define PHYSDEVOP_manage_pci_remove 16 struct physdev_manage_pci { /* IN */ uint8_t bus; uint8_t devfn; }; typedef struct physdev_manage_pci physdev_manage_pci_t; DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); #define PHYSDEVOP_restore_msi 19 struct physdev_restore_msi { /* IN */ uint8_t bus; uint8_t devfn; }; typedef struct physdev_restore_msi physdev_restore_msi_t; DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); #define PHYSDEVOP_manage_pci_add_ext 20 struct physdev_manage_pci_ext { /* IN */ uint8_t bus; uint8_t devfn; unsigned is_extfn; unsigned is_virtfn; struct { uint8_t bus; uint8_t devfn; } physfn; }; typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. */ struct physdev_op { uint32_t cmd; union { struct physdev_irq_status_query irq_status_query; struct physdev_set_iopl set_iopl; struct physdev_set_iobitmap set_iobitmap; struct physdev_apic apic_op; struct physdev_irq irq_op; } u; }; typedef struct physdev_op physdev_op_t; DEFINE_XEN_GUEST_HANDLE(physdev_op_t); #define PHYSDEVOP_setup_gsi 21 struct physdev_setup_gsi { int gsi; /* IN */ uint8_t triggering; /* IN */ uint8_t polarity; /* IN */ }; typedef struct physdev_setup_gsi physdev_setup_gsi_t; DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); /* leave PHYSDEVOP 22 free */ /* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI * the hypercall returns a free pirq */ #define PHYSDEVOP_get_free_pirq 23 struct physdev_get_free_pirq { /* IN */ int type; /* OUT */ uint32_t pirq; }; typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); #define XEN_PCI_MMCFG_RESERVED 0x1 #define PHYSDEVOP_pci_mmcfg_reserved 24 struct physdev_pci_mmcfg_reserved { uint64_t address; uint16_t segment; uint8_t start_bus; uint8_t end_bus; uint32_t flags; }; typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); #define XEN_PCI_DEV_EXTFN 0x1 #define XEN_PCI_DEV_VIRTFN 0x2 #define XEN_PCI_DEV_PXM 0x4 #define PHYSDEVOP_pci_device_add 25 struct physdev_pci_device_add { /* IN */ uint16_t seg; uint8_t bus; uint8_t devfn; uint32_t flags; struct { uint8_t bus; uint8_t devfn; } physfn; + /* + * Optional parameters array. + * First element ([0]) is PXM domain associated with the device (if + * XEN_PCI_DEV_PXM is set) + */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L uint32_t optarr[]; #elif defined(__GNUC__) uint32_t optarr[0]; #endif }; typedef struct physdev_pci_device_add physdev_pci_device_add_t; DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); #define PHYSDEVOP_pci_device_remove 26 #define PHYSDEVOP_restore_msi_ext 27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix 30 +#define PHYSDEVOP_release_msix 31 struct physdev_pci_device { /* IN */ uint16_t seg; uint8_t bus; uint8_t devfn; }; typedef struct physdev_pci_device physdev_pci_device_t; DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); +#define PHYSDEVOP_DBGP_RESET_PREPARE 1 +#define PHYSDEVOP_DBGP_RESET_DONE 2 + +#define PHYSDEVOP_DBGP_BUS_UNKNOWN 0 +#define PHYSDEVOP_DBGP_BUS_PCI 1 + +#define PHYSDEVOP_dbgp_op 29 +struct physdev_dbgp_op { + /* IN */ + uint8_t op; + uint8_t bus; + union { + struct physdev_pci_device pci; + } u; +}; +typedef struct physdev_dbgp_op physdev_dbgp_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** * ** unsupported by newer versions of Xen. ** */ #define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 +#if __XEN_INTERFACE_VERSION__ < 0x00040600 /* * These all-capitals physdev operation names are superceded by the new names - * (defined above) since interface version 0x00030202. + * (defined above) since interface version 0x00030202. The guard above was + * added post-4.5 only though and hence shouldn't check for 0x00030202. */ #define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query #define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl #define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap #define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read #define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write #define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector #define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector #define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi #define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#endif #if __XEN_INTERFACE_VERSION__ < 0x00040200 #define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 #else #define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 #endif #endif /* __XEN_PUBLIC_PHYSDEV_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/platform.h =================================================================== --- projects/clang370-import/sys/xen/interface/platform.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/platform.h (revision 288926) @@ -1,549 +1,657 @@ /****************************************************************************** * platform.h * * Hardware platform operations. Intended for use by domain-0 kernel. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2002-2006, K Fraser */ #ifndef __XEN_PUBLIC_PLATFORM_H__ #define __XEN_PUBLIC_PLATFORM_H__ #include "xen.h" #define XENPF_INTERFACE_VERSION 0x03000001 /* * Set clock such that it would read after 00:00:00 UTC, * 1 January, 1970 if the current system time was . */ -#define XENPF_settime 17 -struct xenpf_settime { +#define XENPF_settime32 17 +struct xenpf_settime32 { /* IN variables. */ uint32_t secs; uint32_t nsecs; uint64_t system_time; }; +#define XENPF_settime64 62 +struct xenpf_settime64 { + /* IN variables. */ + uint64_t secs; + uint32_t nsecs; + uint32_t mbz; + uint64_t system_time; +}; +#if __XEN_INTERFACE_VERSION__ < 0x00040600 +#define XENPF_settime XENPF_settime32 +#define xenpf_settime xenpf_settime32 +#else +#define XENPF_settime XENPF_settime64 +#define xenpf_settime xenpf_settime64 +#endif typedef struct xenpf_settime xenpf_settime_t; DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); /* * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. * On x86, @type is an architecture-defined MTRR memory type. * On success, returns the MTRR that was used (@reg) and a handle that can * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. * (x86-specific). */ #define XENPF_add_memtype 31 struct xenpf_add_memtype { /* IN variables. */ xen_pfn_t mfn; uint64_t nr_mfns; uint32_t type; /* OUT variables. */ uint32_t handle; uint32_t reg; }; typedef struct xenpf_add_memtype xenpf_add_memtype_t; DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); /* * Tear down an existing memory-range type. If @handle is remembered then it * should be passed in to accurately tear down the correct setting (in case * of overlapping memory regions with differing types). If it is not known * then @handle should be set to zero. In all cases @reg must be set. * (x86-specific). */ #define XENPF_del_memtype 32 struct xenpf_del_memtype { /* IN variables. */ uint32_t handle; uint32_t reg; }; typedef struct xenpf_del_memtype xenpf_del_memtype_t; DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); /* Read current type of an MTRR (x86-specific). */ #define XENPF_read_memtype 33 struct xenpf_read_memtype { /* IN variables. */ uint32_t reg; /* OUT variables. */ xen_pfn_t mfn; uint64_t nr_mfns; uint32_t type; }; typedef struct xenpf_read_memtype xenpf_read_memtype_t; DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); #define XENPF_microcode_update 35 struct xenpf_microcode_update { /* IN variables. */ XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ uint32_t length; /* Length of microcode data. */ }; typedef struct xenpf_microcode_update xenpf_microcode_update_t; DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); #define XENPF_platform_quirk 39 #define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ #define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ #define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ struct xenpf_platform_quirk { /* IN variables. */ uint32_t quirk_id; }; typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); #define XENPF_efi_runtime_call 49 #define XEN_EFI_get_time 1 #define XEN_EFI_set_time 2 #define XEN_EFI_get_wakeup_time 3 #define XEN_EFI_set_wakeup_time 4 #define XEN_EFI_get_next_high_monotonic_count 5 #define XEN_EFI_get_variable 6 #define XEN_EFI_set_variable 7 #define XEN_EFI_get_next_variable_name 8 #define XEN_EFI_query_variable_info 9 #define XEN_EFI_query_capsule_capabilities 10 #define XEN_EFI_update_capsule 11 + +struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; +}; + +struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +}; + struct xenpf_efi_runtime_call { uint32_t function; /* * This field is generally used for per sub-function flags (defined * below), except for the XEN_EFI_get_next_high_monotonic_count case, * where it holds the single returned value. */ uint32_t misc; - unsigned long status; + xen_ulong_t status; union { #define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 struct { - struct xenpf_efi_time { - uint16_t year; - uint8_t month; - uint8_t day; - uint8_t hour; - uint8_t min; - uint8_t sec; - uint32_t ns; - int16_t tz; - uint8_t daylight; - } time; + struct xenpf_efi_time time; uint32_t resolution; uint32_t accuracy; } get_time; struct xenpf_efi_time set_time; #define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 #define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 struct xenpf_efi_time get_wakeup_time; #define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 #define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 struct xenpf_efi_time set_wakeup_time; #define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 #define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 #define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 struct { XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ - unsigned long size; + xen_ulong_t size; XEN_GUEST_HANDLE(void) data; - struct xenpf_efi_guid { - uint32_t data1; - uint16_t data2; - uint16_t data3; - uint8_t data4[8]; - } vendor_guid; + struct xenpf_efi_guid vendor_guid; } get_variable, set_variable; struct { - unsigned long size; + xen_ulong_t size; XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ struct xenpf_efi_guid vendor_guid; } get_next_variable_name; +#define XEN_EFI_VARINFO_BOOT_SNAPSHOT 0x00000001 struct { uint32_t attr; uint64_t max_store_size; uint64_t remain_store_size; uint64_t max_size; } query_variable_info; struct { XEN_GUEST_HANDLE(void) capsule_header_array; - unsigned long capsule_count; + xen_ulong_t capsule_count; uint64_t max_capsule_size; - unsigned int reset_type; + uint32_t reset_type; } query_capsule_capabilities; struct { XEN_GUEST_HANDLE(void) capsule_header_array; - unsigned long capsule_count; + xen_ulong_t capsule_count; uint64_t sg_list; /* machine address */ } update_capsule; } u; }; typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t; DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); #define XENPF_firmware_info 50 #define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ #define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ #define XEN_FW_EFI_INFO 4 /* from EFI */ #define XEN_FW_EFI_VERSION 0 #define XEN_FW_EFI_CONFIG_TABLE 1 #define XEN_FW_EFI_VENDOR 2 #define XEN_FW_EFI_MEM_INFO 3 #define XEN_FW_EFI_RT_VERSION 4 +#define XEN_FW_EFI_PCI_ROM 5 +#define XEN_FW_KBD_SHIFT_FLAGS 5 struct xenpf_firmware_info { /* IN variables. */ uint32_t type; uint32_t index; /* OUT variables. */ union { struct { /* Int13, Fn48: Check Extensions Present. */ uint8_t device; /* %dl: bios device number */ uint8_t version; /* %ah: major version */ uint16_t interface_support; /* %cx: support bitmap */ /* Int13, Fn08: Legacy Get Device Parameters. */ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ uint8_t legacy_max_head; /* %dh: max head # */ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ /* NB. First uint16_t of buffer must be set to buffer size. */ XEN_GUEST_HANDLE(void) edd_params; } disk_info; /* XEN_FW_DISK_INFO */ struct { uint8_t device; /* bios device number */ uint32_t mbr_signature; /* offset 0x1b8 in mbr */ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ struct { /* Int10, AX=4F15: Get EDID info. */ uint8_t capabilities; uint8_t edid_transfer_time; /* must refer to 128-byte buffer */ XEN_GUEST_HANDLE(uint8) edid; } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ union xenpf_efi_info { uint32_t version; struct { uint64_t addr; /* EFI_CONFIGURATION_TABLE */ uint32_t nent; } cfg; struct { uint32_t revision; uint32_t bufsz; /* input, in bytes */ XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ } vendor; struct { uint64_t addr; uint64_t size; uint64_t attr; uint32_t type; } mem; + struct { + /* IN variables */ + uint16_t segment; + uint8_t bus; + uint8_t devfn; + uint16_t vendor; + uint16_t devid; + /* OUT variables */ + uint64_t address; + xen_ulong_t size; + } pci_rom; } efi_info; /* XEN_FW_EFI_INFO */ + + /* Int16, Fn02: Get keyboard shift flags. */ + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ } u; }; typedef struct xenpf_firmware_info xenpf_firmware_info_t; DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); #define XENPF_enter_acpi_sleep 51 struct xenpf_enter_acpi_sleep { /* IN variables */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 uint16_t pm1a_cnt_val; /* PM1a control value. */ uint16_t pm1b_cnt_val; /* PM1b control value. */ +#else + uint16_t val_a; /* PM1a control / sleep type A. */ + uint16_t val_b; /* PM1b control / sleep type B. */ +#endif uint32_t sleep_state; /* Which state to enter (Sn). */ - uint32_t flags; /* Must be zero. */ +#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001 + uint32_t flags; /* XENPF_ACPI_SLEEP_*. */ }; typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); #define XENPF_change_freq 52 struct xenpf_change_freq { /* IN variables */ uint32_t flags; /* Must be zero. */ uint32_t cpu; /* Physical cpu. */ uint64_t freq; /* New frequency (Hz). */ }; typedef struct xenpf_change_freq xenpf_change_freq_t; DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); /* * Get idle times (nanoseconds since boot) for physical CPUs specified in the * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is * indexed by CPU number; only entries with the corresponding @cpumap_bitmap * bit set are written to. On return, @cpumap_bitmap is modified so that any * non-existent CPUs are cleared. Such CPUs have their @idletime array entry * cleared. */ #define XENPF_getidletime 53 struct xenpf_getidletime { /* IN/OUT variables */ /* IN: CPUs to interrogate; OUT: subset of IN which are present */ XEN_GUEST_HANDLE(uint8) cpumap_bitmap; /* IN variables */ /* Size of cpumap bitmap. */ uint32_t cpumap_nr_cpus; /* Must be indexable for every cpu in cpumap_bitmap. */ XEN_GUEST_HANDLE(uint64) idletime; /* OUT variables */ /* System time when the idletime snapshots were taken. */ uint64_t now; }; typedef struct xenpf_getidletime xenpf_getidletime_t; DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); #define XENPF_set_processor_pminfo 54 /* ability bits */ #define XEN_PROCESSOR_PM_CX 1 #define XEN_PROCESSOR_PM_PX 2 #define XEN_PROCESSOR_PM_TX 4 /* cmd type */ #define XEN_PM_CX 0 #define XEN_PM_PX 1 #define XEN_PM_TX 2 #define XEN_PM_PDC 3 /* Px sub info type */ #define XEN_PX_PCT 1 #define XEN_PX_PSS 2 #define XEN_PX_PPC 4 #define XEN_PX_PSD 8 struct xen_power_register { uint32_t space_id; uint32_t bit_width; uint32_t bit_offset; uint32_t access_size; uint64_t address; }; struct xen_processor_csd { uint32_t domain; /* domain number of one dependent group */ uint32_t coord_type; /* coordination type */ uint32_t num; /* number of processors in same domain */ }; typedef struct xen_processor_csd xen_processor_csd_t; DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); struct xen_processor_cx { struct xen_power_register reg; /* GAS for Cx trigger register */ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ uint32_t power; /* average power consumption(mW) */ uint32_t dpcnt; /* number of dependency entries */ XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ }; typedef struct xen_processor_cx xen_processor_cx_t; DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); struct xen_processor_flags { uint32_t bm_control:1; uint32_t bm_check:1; uint32_t has_cst:1; uint32_t power_setup_done:1; uint32_t bm_rld_set:1; }; struct xen_processor_power { uint32_t count; /* number of C state entries in array below */ struct xen_processor_flags flags; /* global flags of this processor */ XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ }; struct xen_pct_register { uint8_t descriptor; uint16_t length; uint8_t space_id; uint8_t bit_width; uint8_t bit_offset; uint8_t reserved; uint64_t address; }; struct xen_processor_px { uint64_t core_frequency; /* megahertz */ uint64_t power; /* milliWatts */ uint64_t transition_latency; /* microseconds */ uint64_t bus_master_latency; /* microseconds */ uint64_t control; /* control value */ uint64_t status; /* success indicator */ }; typedef struct xen_processor_px xen_processor_px_t; DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); struct xen_psd_package { uint64_t num_entries; uint64_t revision; uint64_t domain; uint64_t coord_type; uint64_t num_processors; }; struct xen_processor_performance { uint32_t flags; /* flag for Px sub info type */ uint32_t platform_limit; /* Platform limitation on freq usage */ struct xen_pct_register control_register; struct xen_pct_register status_register; uint32_t state_count; /* total available performance states */ XEN_GUEST_HANDLE(xen_processor_px_t) states; struct xen_psd_package domain_info; uint32_t shared_type; /* coordination type of this processor */ }; typedef struct xen_processor_performance xen_processor_performance_t; DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); struct xenpf_set_processor_pminfo { /* IN variables */ uint32_t id; /* ACPI CPU ID */ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ union { struct xen_processor_power power;/* Cx: _CST/_CSD */ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ } u; }; typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); #define XENPF_get_cpuinfo 55 struct xenpf_pcpuinfo { /* IN */ uint32_t xen_cpuid; /* OUT */ /* The maxium cpu_id that is present */ uint32_t max_present; #define XEN_PCPU_FLAGS_ONLINE 1 /* Correponding xen_cpuid is not present*/ #define XEN_PCPU_FLAGS_INVALID 2 uint32_t flags; uint32_t apic_id; uint32_t acpi_id; }; typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); #define XENPF_get_cpu_version 48 struct xenpf_pcpu_version { /* IN */ uint32_t xen_cpuid; /* OUT */ /* The maxium cpu_id that is present */ uint32_t max_present; char vendor_id[12]; uint32_t family; uint32_t model; uint32_t stepping; }; typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); #define XENPF_cpu_online 56 #define XENPF_cpu_offline 57 struct xenpf_cpu_ol { uint32_t cpuid; }; typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); #define XENPF_cpu_hotadd 58 struct xenpf_cpu_hotadd { uint32_t apic_id; uint32_t acpi_id; uint32_t pxm; }; #define XENPF_mem_hotadd 59 struct xenpf_mem_hotadd { uint64_t spfn; uint64_t epfn; uint32_t pxm; uint32_t flags; }; #define XENPF_core_parking 60 #define XEN_CORE_PARKING_SET 1 #define XEN_CORE_PARKING_GET 2 struct xenpf_core_parking { /* IN variables */ uint32_t type; /* IN variables: set cpu nums expected to be idled */ /* OUT variables: get cpu nums actually be idled */ uint32_t idle_nums; }; typedef struct xenpf_core_parking xenpf_core_parking_t; DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); /* + * Access generic platform resources(e.g., accessing MSR, port I/O, etc) + * in unified way. Batch resource operations in one call are supported and + * they are always non-preemptible and executed in their original order. + * The batch itself returns a negative integer for general errors, or a + * non-negative integer for the number of successful operations. For the latter + * case, the @ret in the failed entry (if any) indicates the exact error. + */ +#define XENPF_resource_op 61 + +#define XEN_RESOURCE_OP_MSR_READ 0 +#define XEN_RESOURCE_OP_MSR_WRITE 1 + +/* + * Specially handled MSRs: + * - MSR_IA32_TSC + * READ: Returns the scaled system time(ns) instead of raw timestamp. In + * multiple entry case, if other MSR read is followed by a MSR_IA32_TSC + * read, then both reads are guaranteed to be performed atomically (with + * IRQ disabled). The return time indicates the point of reading that MSR. + * WRITE: Not supported. + */ + +struct xenpf_resource_entry { + union { + uint32_t cmd; /* IN: XEN_RESOURCE_OP_* */ + int32_t ret; /* OUT: return value for failed entry */ + } u; + uint32_t rsvd; /* IN: padding and must be zero */ + uint64_t idx; /* IN: resource address to access */ + uint64_t val; /* IN/OUT: resource value to set/get */ +}; +typedef struct xenpf_resource_entry xenpf_resource_entry_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_resource_entry_t); + +struct xenpf_resource_op { + uint32_t nr_entries; /* number of resource entry */ + uint32_t cpu; /* which cpu to run */ + XEN_GUEST_HANDLE(xenpf_resource_entry_t) entries; +}; +typedef struct xenpf_resource_op xenpf_resource_op_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_resource_op_t); + +#define XENPF_get_symbol 63 +struct xenpf_symdata { + /* IN/OUT variables */ + uint32_t namelen; /* IN: size of name buffer */ + /* OUT: strlen(name) of hypervisor symbol (may be */ + /* larger than what's been copied to guest) */ + uint32_t symnum; /* IN: Symbol to read */ + /* OUT: Next available symbol. If same as IN then */ + /* we reached the end */ + + /* OUT variables */ + XEN_GUEST_HANDLE(char) name; + uint64_t address; + char type; +}; +typedef struct xenpf_symdata xenpf_symdata_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t); + +/* * ` enum neg_errnoval * ` HYPERVISOR_platform_op(const struct xen_platform_op*); */ struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ union { struct xenpf_settime settime; + struct xenpf_settime32 settime32; + struct xenpf_settime64 settime64; struct xenpf_add_memtype add_memtype; struct xenpf_del_memtype del_memtype; struct xenpf_read_memtype read_memtype; struct xenpf_microcode_update microcode; struct xenpf_platform_quirk platform_quirk; struct xenpf_efi_runtime_call efi_runtime_call; struct xenpf_firmware_info firmware_info; struct xenpf_enter_acpi_sleep enter_acpi_sleep; struct xenpf_change_freq change_freq; struct xenpf_getidletime getidletime; struct xenpf_set_processor_pminfo set_pminfo; struct xenpf_pcpuinfo pcpu_info; struct xenpf_pcpu_version pcpu_version; struct xenpf_cpu_ol cpu_ol; struct xenpf_cpu_hotadd cpu_add; struct xenpf_mem_hotadd mem_add; struct xenpf_core_parking core_parking; + struct xenpf_resource_op resource_op; + struct xenpf_symdata symdata; uint8_t pad[128]; } u; }; typedef struct xen_platform_op xen_platform_op_t; DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); #endif /* __XEN_PUBLIC_PLATFORM_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/pmu.h =================================================================== --- projects/clang370-import/sys/xen/interface/pmu.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/pmu.h (revision 288926) @@ -0,0 +1,133 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef __XEN_PUBLIC_PMU_H__ +#define __XEN_PUBLIC_PMU_H__ + +#include "xen.h" +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/pmu.h" +#elif defined (__arm__) || defined (__aarch64__) +#include "arch-arm.h" +#else +#error "Unsupported architecture" +#endif + +#define XENPMU_VER_MAJ 0 +#define XENPMU_VER_MIN 1 + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args); + * + * @cmd == XENPMU_* (PMU operation) + * @args == struct xenpmu_params + */ +/* ` enum xenpmu_op { */ +#define XENPMU_mode_get 0 /* Also used for getting PMU version */ +#define XENPMU_mode_set 1 +#define XENPMU_feature_get 2 +#define XENPMU_feature_set 3 +#define XENPMU_init 4 +#define XENPMU_finish 5 +#define XENPMU_lvtpc_set 6 +#define XENPMU_flush 7 /* Write cached MSR values to HW */ +/* ` } */ + +/* Parameters structure for HYPERVISOR_xenpmu_op call */ +struct xen_pmu_params { + /* IN/OUT parameters */ + struct { + uint32_t maj; + uint32_t min; + } version; + uint64_t val; + + /* IN parameters */ + uint32_t vcpu; + uint32_t pad; +}; +typedef struct xen_pmu_params xen_pmu_params_t; +DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t); + +/* PMU modes: + * - XENPMU_MODE_OFF: No PMU virtualization + * - XENPMU_MODE_SELF: Guests can profile themselves + * - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles + * itself and Xen + * - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles + * everyone: itself, the hypervisor and the guests. + */ +#define XENPMU_MODE_OFF 0 +#define XENPMU_MODE_SELF (1<<0) +#define XENPMU_MODE_HV (1<<1) +#define XENPMU_MODE_ALL (1<<2) + +/* + * PMU features: + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD) + */ +#define XENPMU_FEATURE_INTEL_BTS 1 + +/* + * Shared PMU data between hypervisor and PV(H) domains. + * + * The hypervisor fills out this structure during PMU interrupt and sends an + * interrupt to appropriate VCPU. + * Architecture-independent fields of xen_pmu_data are WO for the hypervisor + * and RO for the guest but some fields in xen_pmu_arch can be writable + * by both the hypervisor and the guest (see arch-$arch/pmu.h). + */ +struct xen_pmu_data { + /* Interrupted VCPU */ + uint32_t vcpu_id; + + /* + * Physical processor on which the interrupt occurred. On non-privileged + * guests set to vcpu_id; + */ + uint32_t pcpu_id; + + /* + * Domain that was interrupted. On non-privileged guests set to DOMID_SELF. + * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in + * XENPMU_MODE_ALL mode, domain ID of another domain. + */ + domid_t domain_id; + + uint8_t pad[6]; + + /* Architecture-specific information */ + struct xen_pmu_arch pmu; +}; + +#endif /* __XEN_PUBLIC_PMU_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: projects/clang370-import/sys/xen/interface/pmu.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/sched.h =================================================================== --- projects/clang370-import/sys/xen/interface/sched.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/sched.h (revision 288926) @@ -1,145 +1,175 @@ /****************************************************************************** * sched.h - * + * * Scheduler state interactions - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_SCHED_H__ #define __XEN_PUBLIC_SCHED_H__ #include "event_channel.h" /* + * `incontents 150 sched Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* * The prototype for this hypercall is: - * long sched_op(int cmd, void *arg) + * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * * @cmd == SCHEDOP_??? (scheduler operation). * @arg == Operation-specific extra argument(s), as described below. - * + * ... == Additional Operation-specific extra arguments, described below. + * * Versions of Xen prior to 3.0.2 provided only the following legacy version * of this hypercall, supporting only the commands yield, block and shutdown: * long sched_op(int cmd, unsigned long arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) * == SHUTDOWN_* code (SCHEDOP_shutdown) - * This legacy version is available to new guests as sched_op_compat(). + * + * This legacy version is available to new guests as: + * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) */ +/* ` enum sched_op { // SCHEDOP_* => struct sched_* */ /* * Voluntarily yield the CPU. * @arg == NULL. */ #define SCHEDOP_yield 0 /* * Block execution of this VCPU until an event is received for processing. * If called with event upcalls masked, this operation will atomically * reenable event delivery and check for pending events before blocking the * VCPU. This avoids a "wakeup waiting" race. * @arg == NULL. */ #define SCHEDOP_block 1 /* * Halt execution of this domain (all VCPUs) and notify the system controller. - * @arg == pointer to sched_shutdown structure. + * @arg == pointer to sched_shutdown_t structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN + * of the guest's start info page. RDX/EDX is the third hypercall + * argument. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. */ #define SCHEDOP_shutdown 2 -struct sched_shutdown { - unsigned int reason; /* SHUTDOWN_* */ -}; -typedef struct sched_shutdown sched_shutdown_t; -DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); /* * Poll a set of event-channel ports. Return when one or more are pending. An * optional timeout may be specified. - * @arg == pointer to sched_poll structure. + * @arg == pointer to sched_poll_t structure. */ #define SCHEDOP_poll 3 -struct sched_poll { - XEN_GUEST_HANDLE(evtchn_port_t) ports; - unsigned int nr_ports; - uint64_t timeout; -}; -typedef struct sched_poll sched_poll_t; -DEFINE_XEN_GUEST_HANDLE(sched_poll_t); /* * Declare a shutdown for another domain. The main use of this function is * in interpreting shutdown requests and reasons for fully-virtualized * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. - * @arg == pointer to sched_remote_shutdown structure. + * @arg == pointer to sched_remote_shutdown_t structure. */ #define SCHEDOP_remote_shutdown 4 -struct sched_remote_shutdown { - domid_t domain_id; /* Remote domain ID */ - unsigned int reason; /* SHUTDOWN_xxx reason */ -}; -typedef struct sched_remote_shutdown sched_remote_shutdown_t; -DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); /* * Latch a shutdown code, so that when the domain later shuts down it * reports this code to the control tools. - * @arg == as for SCHEDOP_shutdown. + * @arg == sched_shutdown_t, as for SCHEDOP_shutdown. */ #define SCHEDOP_shutdown_code 5 /* * Setup, poke and destroy a domain watchdog timer. - * @arg == pointer to sched_watchdog structure. + * @arg == pointer to sched_watchdog_t structure. * With id == 0, setup a domain watchdog timer to cause domain shutdown * after timeout, returns watchdog id. * With id != 0 and timeout == 0, destroy domain watchdog timer. * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. */ #define SCHEDOP_watchdog 6 +/* ` } */ + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); + +struct sched_poll { + XEN_GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + struct sched_watchdog { uint32_t id; /* watchdog ID */ uint32_t timeout; /* timeout */ }; typedef struct sched_watchdog sched_watchdog_t; DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); /* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. */ +/* ` enum sched_shutdown_reason { */ #define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ #define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +#define SHUTDOWN_MAX 4 /* Maximum valid shutdown reason. */ +/* ` } */ #endif /* __XEN_PUBLIC_SCHED_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/sysctl.h =================================================================== --- projects/clang370-import/sys/xen/interface/sysctl.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/sysctl.h (revision 288926) @@ -1,655 +1,833 @@ /****************************************************************************** * sysctl.h * * System management operations. For use by node control stack. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2002-2006, K Fraser */ #ifndef __XEN_PUBLIC_SYSCTL_H__ #define __XEN_PUBLIC_SYSCTL_H__ #if !defined(__XEN__) && !defined(__XEN_TOOLS__) #error "sysctl operations are intended for use by node control tools only" #endif #include "xen.h" #include "domctl.h" +#include "physdev.h" +#include "tmem.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000009 +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000C /* * Read console content from Xen buffer ring. */ /* XEN_SYSCTL_readconsole */ struct xen_sysctl_readconsole { /* IN: Non-zero -> clear after reading. */ uint8_t clear; /* IN: Non-zero -> start index specified by @index field. */ uint8_t incremental; uint8_t pad0, pad1; /* * IN: Start index for consuming from ring buffer (if @incremental); * OUT: End index after consuming from ring buffer. */ uint32_t index; /* IN: Virtual address to write console data. */ XEN_GUEST_HANDLE_64(char) buffer; /* IN: Size of buffer; OUT: Bytes written to buffer. */ uint32_t count; }; typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); /* Get trace buffers machine base address */ /* XEN_SYSCTL_tbuf_op */ struct xen_sysctl_tbuf_op { /* IN variables */ #define XEN_SYSCTL_TBUFOP_get_info 0 #define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 #define XEN_SYSCTL_TBUFOP_set_evt_mask 2 #define XEN_SYSCTL_TBUFOP_set_size 3 #define XEN_SYSCTL_TBUFOP_enable 4 #define XEN_SYSCTL_TBUFOP_disable 5 uint32_t cmd; /* IN/OUT variables */ - struct xenctl_cpumap cpu_mask; + struct xenctl_bitmap cpu_mask; uint32_t evt_mask; /* OUT variables */ uint64_aligned_t buffer_mfn; uint32_t size; /* Also an IN variable! */ }; typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); /* * Get physical information about the host machine */ /* XEN_SYSCTL_physinfo */ /* (x86) The platform supports HVM guests. */ #define _XEN_SYSCTL_PHYSCAP_hvm 0 #define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) /* (x86) The platform supports HVM-guest direct access to I/O devices. */ #define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 #define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) struct xen_sysctl_physinfo { uint32_t threads_per_core; uint32_t cores_per_socket; uint32_t nr_cpus; /* # CPUs currently online */ uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ uint32_t nr_nodes; /* # nodes currently online */ uint32_t max_node_id; /* Largest possible node ID on this host */ uint32_t cpu_khz; uint64_aligned_t total_pages; uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; + uint64_aligned_t outstanding_pages; uint32_t hw_cap[8]; /* XEN_SYSCTL_PHYSCAP_??? */ uint32_t capabilities; }; typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); /* * Get the ID of the current scheduler. */ /* XEN_SYSCTL_sched_id */ struct xen_sysctl_sched_id { /* OUT variable */ uint32_t sched_id; }; typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); /* Interface for controlling Xen software performance counters. */ /* XEN_SYSCTL_perfc_op */ /* Sub-operations: */ #define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ #define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ struct xen_sysctl_perfc_desc { char name[80]; /* name of perf counter */ uint32_t nr_vals; /* number of values for this counter */ }; typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); typedef uint32_t xen_sysctl_perfc_val_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); struct xen_sysctl_perfc_op { /* IN variables. */ uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ /* OUT variables. */ uint32_t nr_counters; /* number of counters description */ uint32_t nr_vals; /* number of values */ /* counter information (or NULL) */ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; /* counter values (or NULL) */ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; }; typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); /* XEN_SYSCTL_getdomaininfolist */ struct xen_sysctl_getdomaininfolist { /* IN variables. */ domid_t first_domain; uint32_t max_domains; XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; /* OUT variables. */ uint32_t num_domains; }; typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); /* Inject debug keys into Xen. */ /* XEN_SYSCTL_debug_keys */ struct xen_sysctl_debug_keys { /* IN variables. */ XEN_GUEST_HANDLE_64(char) keys; uint32_t nr_keys; }; typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); /* Get physical CPU information. */ /* XEN_SYSCTL_getcpuinfo */ struct xen_sysctl_cpuinfo { uint64_aligned_t idletime; }; typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); struct xen_sysctl_getcpuinfo { /* IN variables. */ uint32_t max_cpus; XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; /* OUT variables. */ uint32_t nr_cpus; }; typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); /* XEN_SYSCTL_availheap */ struct xen_sysctl_availheap { /* IN variables. */ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ int32_t node; /* NUMA node of interest (-1 for all nodes). */ /* OUT variables. */ uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ }; typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); /* XEN_SYSCTL_get_pmstat */ struct pm_px_val { uint64_aligned_t freq; /* Px core frequency */ uint64_aligned_t residency; /* Px residency time */ uint64_aligned_t count; /* Px transition count */ }; typedef struct pm_px_val pm_px_val_t; DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); struct pm_px_stat { uint8_t total; /* total Px states */ uint8_t usable; /* usable Px states */ uint8_t last; /* last Px state */ uint8_t cur; /* current Px state */ XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ XEN_GUEST_HANDLE_64(pm_px_val_t) pt; }; typedef struct pm_px_stat pm_px_stat_t; DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); struct pm_cx_stat { uint32_t nr; /* entry nr in triggers & residencies, including C0 */ uint32_t last; /* last Cx state */ uint64_aligned_t idle_time; /* idle time from boot */ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ - uint64_aligned_t pc2; - uint64_aligned_t pc3; - uint64_aligned_t pc6; - uint64_aligned_t pc7; - uint64_aligned_t cc3; - uint64_aligned_t cc6; - uint64_aligned_t cc7; + uint32_t nr_pc; /* entry nr in pc[] */ + uint32_t nr_cc; /* entry nr in cc[] */ + /* + * These two arrays may (and generally will) have unused slots; slots not + * having a corresponding hardware register will not be written by the + * hypervisor. It is therefore up to the caller to put a suitable sentinel + * into all slots before invoking the function. + * Indexing is 1-biased (PC1/CC1 being at index 0). + */ + XEN_GUEST_HANDLE_64(uint64) pc; + XEN_GUEST_HANDLE_64(uint64) cc; }; struct xen_sysctl_get_pmstat { #define PMSTAT_CATEGORY_MASK 0xf0 #define PMSTAT_PX 0x10 #define PMSTAT_CX 0x20 #define PMSTAT_get_max_px (PMSTAT_PX | 0x1) #define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) #define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) #define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) #define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) #define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) uint32_t type; uint32_t cpuid; union { struct pm_px_stat getpx; struct pm_cx_stat getcx; /* other struct for tx, etc */ } u; }; typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); /* XEN_SYSCTL_cpu_hotplug */ struct xen_sysctl_cpu_hotplug { /* IN variables */ uint32_t cpu; /* Physical cpu. */ #define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 #define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 uint32_t op; /* hotplug opcode */ }; typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); /* * Get/set xen power management, include * 1. cpufreq governors and related parameters */ /* XEN_SYSCTL_pm_op */ struct xen_userspace { uint32_t scaling_setspeed; }; typedef struct xen_userspace xen_userspace_t; struct xen_ondemand { uint32_t sampling_rate_max; uint32_t sampling_rate_min; uint32_t sampling_rate; uint32_t up_threshold; }; typedef struct xen_ondemand xen_ondemand_t; /* * cpufreq para name of this structure named * same as sysfs file name of native linux */ #define CPUFREQ_NAME_LEN 16 struct xen_get_cpufreq_para { /* IN/OUT variable */ uint32_t cpu_num; uint32_t freq_num; uint32_t gov_num; /* for all governors */ /* OUT variable */ XEN_GUEST_HANDLE_64(uint32) affected_cpus; XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; XEN_GUEST_HANDLE_64(char) scaling_available_governors; char scaling_driver[CPUFREQ_NAME_LEN]; uint32_t cpuinfo_cur_freq; uint32_t cpuinfo_max_freq; uint32_t cpuinfo_min_freq; uint32_t scaling_cur_freq; char scaling_governor[CPUFREQ_NAME_LEN]; uint32_t scaling_max_freq; uint32_t scaling_min_freq; /* for specific governor */ union { struct xen_userspace userspace; struct xen_ondemand ondemand; } u; int32_t turbo_enabled; }; struct xen_set_cpufreq_gov { char scaling_governor[CPUFREQ_NAME_LEN]; }; struct xen_set_cpufreq_para { #define SCALING_MAX_FREQ 1 #define SCALING_MIN_FREQ 2 #define SCALING_SETSPEED 3 #define SAMPLING_RATE 4 #define UP_THRESHOLD 5 uint32_t ctrl_type; uint32_t ctrl_value; }; struct xen_sysctl_pm_op { #define PM_PARA_CATEGORY_MASK 0xf0 #define CPUFREQ_PARA 0x10 /* cpufreq command type */ #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) /* set/reset scheduler power saving option */ #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 /* cpuidle max_cstate access command */ #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 /* set scheduler migration cost value */ #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 /* enable/disable turbo mode when in dbs governor */ #define XEN_SYSCTL_pm_op_enable_turbo 0x26 #define XEN_SYSCTL_pm_op_disable_turbo 0x27 uint32_t cmd; uint32_t cpuid; union { struct xen_get_cpufreq_para get_para; struct xen_set_cpufreq_gov set_gov; struct xen_set_cpufreq_para set_para; uint64_aligned_t get_avgfreq; uint32_t set_sched_opt_smt; uint32_t get_max_cstate; uint32_t set_max_cstate; uint32_t get_vcpu_migration_delay; uint32_t set_vcpu_migration_delay; } u; }; /* XEN_SYSCTL_page_offline_op */ struct xen_sysctl_page_offline_op { /* IN: range of page to be offlined */ #define sysctl_page_offline 1 #define sysctl_page_online 2 #define sysctl_query_page_offline 3 uint32_t cmd; uint32_t start; uint32_t end; /* OUT: result of page offline request */ /* * bit 0~15: result flags * bit 16~31: owner */ XEN_GUEST_HANDLE(uint32) status; }; #define PG_OFFLINE_STATUS_MASK (0xFFUL) /* The result is invalid, i.e. HV does not handle it */ #define PG_OFFLINE_INVALID (0x1UL << 0) #define PG_OFFLINE_OFFLINED (0x1UL << 1) #define PG_OFFLINE_PENDING (0x1UL << 2) #define PG_OFFLINE_FAILED (0x1UL << 3) #define PG_OFFLINE_AGAIN (0x1UL << 4) #define PG_ONLINE_FAILED PG_OFFLINE_FAILED #define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED #define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) #define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) #define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) #define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) #define PG_OFFLINE_MISC_MASK (0xFFUL << 4) /* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */ #define PG_OFFLINE_XENPAGE (0x1UL << 8) #define PG_OFFLINE_DOM0PAGE (0x1UL << 9) #define PG_OFFLINE_ANONYMOUS (0x1UL << 10) #define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) #define PG_OFFLINE_OWNED (0x1UL << 12) #define PG_OFFLINE_BROKEN (0x1UL << 13) #define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN #define PG_OFFLINE_OWNER_SHIFT 16 /* XEN_SYSCTL_lockprof_op */ /* Sub-operations: */ #define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ #define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ /* Record-type: */ #define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ #define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ #define LOCKPROF_TYPE_N 2 /* number of types */ struct xen_sysctl_lockprof_data { char name[40]; /* lock name (may include up to 2 %d specifiers) */ int32_t type; /* LOCKPROF_TYPE_??? */ int32_t idx; /* index (e.g. domain id) */ uint64_aligned_t lock_cnt; /* # of locking succeeded */ uint64_aligned_t block_cnt; /* # of wait for lock */ uint64_aligned_t lock_time; /* nsecs lock held */ uint64_aligned_t block_time; /* nsecs waited for lock */ }; typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); struct xen_sysctl_lockprof_op { /* IN variables. */ uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ uint32_t max_elem; /* size of output buffer */ /* OUT variables (query only). */ uint32_t nr_elem; /* number of elements available */ uint64_aligned_t time; /* nsecs of profile measurement */ /* profile information (or NULL) */ XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; }; typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); -/* XEN_SYSCTL_topologyinfo */ -#define INVALID_TOPOLOGY_ID (~0U) -struct xen_sysctl_topologyinfo { - /* - * IN: maximum addressable entry in the caller-provided arrays. - * OUT: largest cpu identifier in the system. - * If OUT is greater than IN then the arrays are truncated! - * If OUT is leass than IN then the array tails are not written by sysctl. - */ - uint32_t max_cpu_index; +/* XEN_SYSCTL_cputopoinfo */ +#define XEN_INVALID_CORE_ID (~0U) +#define XEN_INVALID_SOCKET_ID (~0U) +#define XEN_INVALID_NODE_ID (~0U) - /* - * If not NULL, these arrays are filled with core/socket/node identifier - * for each cpu. - * If a cpu has no core/socket/node information (e.g., cpu not present) - * then the sentinel value ~0u is written to each array. - * The number of array elements written by the sysctl is: - * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 - */ - XEN_GUEST_HANDLE_64(uint32) cpu_to_core; - XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; - XEN_GUEST_HANDLE_64(uint32) cpu_to_node; +struct xen_sysctl_cputopo { + uint32_t core; + uint32_t socket; + uint32_t node; }; -typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); +typedef struct xen_sysctl_cputopo xen_sysctl_cputopo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopo_t); +/* + * IN: + * - a NULL 'cputopo' handle is a request for maximun 'num_cpus'. + * - otherwise it's the number of entries in 'cputopo' + * + * OUT: + * - If 'num_cpus' is less than the number Xen wants to write but the handle + * handle is not a NULL one, partial data gets returned and 'num_cpus' gets + * updated to reflect the intended number. + * - Otherwise, 'num_cpus' shall indicate the number of entries written, which + * may be less than the input value. + */ +struct xen_sysctl_cputopoinfo { + uint32_t num_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cputopo_t) cputopo; +}; +typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t); + /* XEN_SYSCTL_numainfo */ -#define INVALID_NUMAINFO_ID (~0U) +#define XEN_INVALID_MEM_SZ (~0U) +#define XEN_INVALID_NODE_DIST (~0U) + +struct xen_sysctl_meminfo { + uint64_t memsize; + uint64_t memfree; +}; +typedef struct xen_sysctl_meminfo xen_sysctl_meminfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_meminfo_t); + +/* + * IN: + * - Both 'meminfo' and 'distance' handles being null is a request + * for maximum value of 'num_nodes'. + * - Otherwise it's the number of entries in 'meminfo' and square root + * of number of entries in 'distance' (when corresponding handle is + * non-null) + * + * OUT: + * - If 'num_nodes' is less than the number Xen wants to write but either + * handle is not a NULL one, partial data gets returned and 'num_nodes' + * gets updated to reflect the intended number. + * - Otherwise, 'num_nodes' shall indicate the number of entries written, which + * may be less than the input value. + */ + struct xen_sysctl_numainfo { - /* - * IN: maximum addressable entry in the caller-provided arrays. - * OUT: largest node identifier in the system. - * If OUT is greater than IN then the arrays are truncated! - */ - uint32_t max_node_index; + uint32_t num_nodes; - /* NB. Entries are 0 if node is not present. */ - XEN_GUEST_HANDLE_64(uint64) node_to_memsize; - XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + XEN_GUEST_HANDLE_64(xen_sysctl_meminfo_t) meminfo; /* - * Array, of size (max_node_index+1)^2, listing memory access distances - * between nodes. If an entry has no node distance information (e.g., node - * not present) then the value ~0u is written. - * - * Note that the array rows must be indexed by multiplying by the minimum - * of the caller-provided max_node_index and the returned value of - * max_node_index. That is, if the largest node index in the system is - * smaller than the caller can handle, a smaller 2-d array is constructed - * within the space provided by the caller. When this occurs, trailing - * space provided by the caller is not modified. If the largest node index - * in the system is larger than the caller can handle, then a 2-d array of - * the maximum size handleable by the caller is constructed. + * Distance between nodes 'i' and 'j' is stored in index 'i*N + j', + * where N is the number of nodes that will be returned in 'num_nodes' + * (i.e. not 'num_nodes' provided by the caller) */ - XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; + XEN_GUEST_HANDLE_64(uint32) distance; }; typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); /* XEN_SYSCTL_cpupool_op */ #define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */ #define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */ #define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */ #define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */ #define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */ #define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ #define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */ #define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF struct xen_sysctl_cpupool_op { uint32_t op; /* IN */ uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ uint32_t sched_id; /* IN: C OUT: I */ uint32_t domid; /* IN: M */ uint32_t cpu; /* IN: AR */ uint32_t n_dom; /* OUT: I */ - struct xenctl_cpumap cpumap; /* OUT: IF */ + struct xenctl_bitmap cpumap; /* OUT: IF */ }; typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); #define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 /* * This structure is used to pass a new ARINC653 schedule from a * privileged domain (ie dom0) to Xen. */ struct xen_sysctl_arinc653_schedule { /* major_frame holds the time for the new schedule's major frame * in nanoseconds. */ uint64_aligned_t major_frame; /* num_sched_entries holds how many of the entries in the * sched_entries[] array are valid. */ uint8_t num_sched_entries; /* The sched_entries array holds the actual schedule entries. */ struct { /* dom_handle must match a domain's UUID */ xen_domain_handle_t dom_handle; /* If a domain has multiple VCPUs, vcpu_id specifies which one * this schedule entry applies to. It should be set to 0 if * there is only one VCPU for the domain. */ unsigned int vcpu_id; /* runtime specifies the amount of time that should be allocated * to this VCPU per major frame. It is specified in nanoseconds */ uint64_aligned_t runtime; } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; }; typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t); struct xen_sysctl_credit_schedule { /* Length of timeslice in milliseconds */ #define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000 #define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 unsigned tslice_ms; /* Rate limit (minimum timeslice) in microseconds */ #define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000 #define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100 unsigned ratelimit_us; }; typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t); /* XEN_SYSCTL_scheduler_op */ /* Set or get info? */ #define XEN_SYSCTL_SCHEDOP_putinfo 0 #define XEN_SYSCTL_SCHEDOP_getinfo 1 struct xen_sysctl_scheduler_op { uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */ uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */ uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */ union { struct xen_sysctl_sched_arinc653 { XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule; } sched_arinc653; struct xen_sysctl_credit_schedule sched_credit; } u; }; typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t); +/* XEN_SYSCTL_coverage_op */ +/* + * Get total size of information, to help allocate + * the buffer. The pointer points to a 32 bit value. + */ +#define XEN_SYSCTL_COVERAGE_get_total_size 0 + +/* + * Read coverage information in a single run + * You must use a tool to split them. + */ +#define XEN_SYSCTL_COVERAGE_read 1 + +/* + * Reset all the coverage counters to 0 + * No parameters. + */ +#define XEN_SYSCTL_COVERAGE_reset 2 + +/* + * Like XEN_SYSCTL_COVERAGE_read but reset also + * counters to 0 in a single call. + */ +#define XEN_SYSCTL_COVERAGE_read_and_reset 3 + +struct xen_sysctl_coverage_op { + uint32_t cmd; /* XEN_SYSCTL_COVERAGE_* */ + union { + uint32_t total_size; /* OUT */ + XEN_GUEST_HANDLE_64(uint8) raw_info; /* OUT */ + } u; +}; +typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t); + +#define XEN_SYSCTL_PSR_CMT_get_total_rmid 0 +#define XEN_SYSCTL_PSR_CMT_get_l3_upscaling_factor 1 +/* The L3 cache size is returned in KB unit */ +#define XEN_SYSCTL_PSR_CMT_get_l3_cache_size 2 +#define XEN_SYSCTL_PSR_CMT_enabled 3 +#define XEN_SYSCTL_PSR_CMT_get_l3_event_mask 4 +struct xen_sysctl_psr_cmt_op { + uint32_t cmd; /* IN: XEN_SYSCTL_PSR_CMT_* */ + uint32_t flags; /* padding variable, may be extended for future use */ + union { + uint64_t data; /* OUT */ + struct { + uint32_t cpu; /* IN */ + uint32_t rsvd; + } l3_cache; + } u; +}; +typedef struct xen_sysctl_psr_cmt_op xen_sysctl_psr_cmt_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cmt_op_t); + +/* XEN_SYSCTL_pcitopoinfo */ +#define XEN_INVALID_DEV (XEN_INVALID_NODE_ID - 1) +struct xen_sysctl_pcitopoinfo { + /* + * IN: Number of elements in 'pcitopo' and 'nodes' arrays. + * OUT: Number of processed elements of those arrays. + */ + uint32_t num_devs; + + /* IN: list of devices for which node IDs are requested. */ + XEN_GUEST_HANDLE_64(physdev_pci_device_t) devs; + + /* + * OUT: node identifier for each device. + * If information for a particular device is not available then + * corresponding entry will be set to XEN_INVALID_NODE_ID. If + * device is not known to the hypervisor then XEN_INVALID_DEV + * will be provided. + */ + XEN_GUEST_HANDLE_64(uint32) nodes; +}; +typedef struct xen_sysctl_pcitopoinfo xen_sysctl_pcitopoinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_pcitopoinfo_t); + +#define XEN_SYSCTL_PSR_CAT_get_l3_info 0 +struct xen_sysctl_psr_cat_op { + uint32_t cmd; /* IN: XEN_SYSCTL_PSR_CAT_* */ + uint32_t target; /* IN */ + union { + struct { + uint32_t cbm_len; /* OUT: CBM length */ + uint32_t cos_max; /* OUT: Maximum COS */ + } l3_info; + } u; +}; +typedef struct xen_sysctl_psr_cat_op xen_sysctl_psr_cat_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cat_op_t); + +#define XEN_SYSCTL_TMEM_OP_ALL_CLIENTS 0xFFFFU + +#define XEN_SYSCTL_TMEM_OP_THAW 0 +#define XEN_SYSCTL_TMEM_OP_FREEZE 1 +#define XEN_SYSCTL_TMEM_OP_FLUSH 2 +#define XEN_SYSCTL_TMEM_OP_DESTROY 3 +#define XEN_SYSCTL_TMEM_OP_LIST 4 +#define XEN_SYSCTL_TMEM_OP_SET_WEIGHT 5 +#define XEN_SYSCTL_TMEM_OP_SET_CAP 6 +#define XEN_SYSCTL_TMEM_OP_SET_COMPRESS 7 +#define XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB 8 +#define XEN_SYSCTL_TMEM_OP_SAVE_BEGIN 10 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_VERSION 11 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_MAXPOOLS 12 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_CLIENT_WEIGHT 13 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_CLIENT_CAP 14 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_CLIENT_FLAGS 15 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_POOL_FLAGS 16 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_POOL_NPAGES 17 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_POOL_UUID 18 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE 19 +#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV 20 +#define XEN_SYSCTL_TMEM_OP_SAVE_END 21 +#define XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN 30 +#define XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE 32 +#define XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE 33 + +/* + * XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_[PAGE|INV] override the 'buf' in + * xen_sysctl_tmem_op with this structure - sometimes with an extra + * page tackled on. + */ +struct tmem_handle { + uint32_t pool_id; + uint32_t index; + xen_tmem_oid_t oid; +}; + +struct xen_sysctl_tmem_op { + uint32_t cmd; /* IN: XEN_SYSCTL_TMEM_OP_* . */ + int32_t pool_id; /* IN: 0 by default unless _SAVE_*, RESTORE_* .*/ + uint32_t cli_id; /* IN: client id, 0 for XEN_SYSCTL_TMEM_QUERY_FREEABLE_MB + for all others can be the domain id or + XEN_SYSCTL_TMEM_OP_ALL_CLIENTS for all. */ + uint32_t arg1; /* IN: If not applicable to command use 0. */ + uint32_t arg2; /* IN: If not applicable to command use 0. */ + uint32_t pad; /* Padding so structure is the same under 32 and 64. */ + xen_tmem_oid_t oid; /* IN: If not applicable to command use 0s. */ + XEN_GUEST_HANDLE_64(char) buf; /* IN/OUT: Buffer to save and restore ops. */ +}; +typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t); + struct xen_sysctl { uint32_t cmd; #define XEN_SYSCTL_readconsole 1 #define XEN_SYSCTL_tbuf_op 2 #define XEN_SYSCTL_physinfo 3 #define XEN_SYSCTL_sched_id 4 #define XEN_SYSCTL_perfc_op 5 #define XEN_SYSCTL_getdomaininfolist 6 #define XEN_SYSCTL_debug_keys 7 #define XEN_SYSCTL_getcpuinfo 8 #define XEN_SYSCTL_availheap 9 #define XEN_SYSCTL_get_pmstat 10 #define XEN_SYSCTL_cpu_hotplug 11 #define XEN_SYSCTL_pm_op 12 #define XEN_SYSCTL_page_offline_op 14 #define XEN_SYSCTL_lockprof_op 15 -#define XEN_SYSCTL_topologyinfo 16 +#define XEN_SYSCTL_cputopoinfo 16 #define XEN_SYSCTL_numainfo 17 #define XEN_SYSCTL_cpupool_op 18 #define XEN_SYSCTL_scheduler_op 19 +#define XEN_SYSCTL_coverage_op 20 +#define XEN_SYSCTL_psr_cmt_op 21 +#define XEN_SYSCTL_pcitopoinfo 22 +#define XEN_SYSCTL_psr_cat_op 23 +#define XEN_SYSCTL_tmem_op 24 uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ union { struct xen_sysctl_readconsole readconsole; struct xen_sysctl_tbuf_op tbuf_op; struct xen_sysctl_physinfo physinfo; - struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_cputopoinfo cputopoinfo; + struct xen_sysctl_pcitopoinfo pcitopoinfo; struct xen_sysctl_numainfo numainfo; struct xen_sysctl_sched_id sched_id; struct xen_sysctl_perfc_op perfc_op; struct xen_sysctl_getdomaininfolist getdomaininfolist; struct xen_sysctl_debug_keys debug_keys; struct xen_sysctl_getcpuinfo getcpuinfo; struct xen_sysctl_availheap availheap; struct xen_sysctl_get_pmstat get_pmstat; struct xen_sysctl_cpu_hotplug cpu_hotplug; struct xen_sysctl_pm_op pm_op; struct xen_sysctl_page_offline_op page_offline; struct xen_sysctl_lockprof_op lockprof_op; struct xen_sysctl_cpupool_op cpupool_op; struct xen_sysctl_scheduler_op scheduler_op; + struct xen_sysctl_coverage_op coverage_op; + struct xen_sysctl_psr_cmt_op psr_cmt_op; + struct xen_sysctl_psr_cat_op psr_cat_op; + struct xen_sysctl_tmem_op tmem_op; uint8_t pad[128]; } u; }; typedef struct xen_sysctl xen_sysctl_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); #endif /* __XEN_PUBLIC_SYSCTL_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/tmem.h =================================================================== --- projects/clang370-import/sys/xen/interface/tmem.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/tmem.h (revision 288926) @@ -1,148 +1,124 @@ /****************************************************************************** * tmem.h * * Guest OS interface to Xen Transcendent Memory. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004, K A Fraser */ #ifndef __XEN_PUBLIC_TMEM_H__ #define __XEN_PUBLIC_TMEM_H__ #include "xen.h" /* version of ABI */ #define TMEM_SPEC_VERSION 1 /* Commands to HYPERVISOR_tmem_op() */ -#define TMEM_CONTROL 0 +#ifdef __XEN__ +#define TMEM_CONTROL 0 /* Now called XEN_SYSCTL_tmem_op */ +#else +#undef TMEM_CONTROL +#endif #define TMEM_NEW_POOL 1 #define TMEM_DESTROY_POOL 2 -#define TMEM_NEW_PAGE 3 #define TMEM_PUT_PAGE 4 #define TMEM_GET_PAGE 5 #define TMEM_FLUSH_PAGE 6 #define TMEM_FLUSH_OBJECT 7 +#if __XEN_INTERFACE_VERSION__ < 0x00040400 +#define TMEM_NEW_PAGE 3 #define TMEM_READ 8 #define TMEM_WRITE 9 #define TMEM_XCHG 10 +#endif /* Privileged commands to HYPERVISOR_tmem_op() */ -#define TMEM_AUTH 101 +#define TMEM_AUTH 101 #define TMEM_RESTORE_NEW 102 -/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */ -#define TMEMC_THAW 0 -#define TMEMC_FREEZE 1 -#define TMEMC_FLUSH 2 -#define TMEMC_DESTROY 3 -#define TMEMC_LIST 4 -#define TMEMC_SET_WEIGHT 5 -#define TMEMC_SET_CAP 6 -#define TMEMC_SET_COMPRESS 7 -#define TMEMC_QUERY_FREEABLE_MB 8 -#define TMEMC_SAVE_BEGIN 10 -#define TMEMC_SAVE_GET_VERSION 11 -#define TMEMC_SAVE_GET_MAXPOOLS 12 -#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13 -#define TMEMC_SAVE_GET_CLIENT_CAP 14 -#define TMEMC_SAVE_GET_CLIENT_FLAGS 15 -#define TMEMC_SAVE_GET_POOL_FLAGS 16 -#define TMEMC_SAVE_GET_POOL_NPAGES 17 -#define TMEMC_SAVE_GET_POOL_UUID 18 -#define TMEMC_SAVE_GET_NEXT_PAGE 19 -#define TMEMC_SAVE_GET_NEXT_INV 20 -#define TMEMC_SAVE_END 21 -#define TMEMC_RESTORE_BEGIN 30 -#define TMEMC_RESTORE_PUT_PAGE 32 -#define TMEMC_RESTORE_FLUSH_PAGE 33 - /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ #define TMEM_POOL_PERSIST 1 #define TMEM_POOL_SHARED 2 #define TMEM_POOL_PRECOMPRESSED 4 #define TMEM_POOL_PAGESIZE_SHIFT 4 #define TMEM_POOL_PAGESIZE_MASK 0xf #define TMEM_POOL_VERSION_SHIFT 24 #define TMEM_POOL_VERSION_MASK 0xff #define TMEM_POOL_RESERVED_BITS 0x00ffff00 /* Bits for client flags (save/restore) */ #define TMEM_CLIENT_COMPRESS 1 #define TMEM_CLIENT_FROZEN 2 /* Special errno values */ #define EFROZEN 1000 #define EEMPTY 1001 +struct xen_tmem_oid { + uint64_t oid[3]; +}; +typedef struct xen_tmem_oid xen_tmem_oid_t; +DEFINE_XEN_GUEST_HANDLE(xen_tmem_oid_t); #ifndef __ASSEMBLY__ +#if __XEN_INTERFACE_VERSION__ < 0x00040400 typedef xen_pfn_t tmem_cli_mfn_t; +#endif typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t; struct tmem_op { uint32_t cmd; int32_t pool_id; union { struct { uint64_t uuid[2]; uint32_t flags; uint32_t arg1; } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */ - struct { - uint32_t subop; - uint32_t cli_id; - uint32_t arg1; - uint32_t arg2; - uint64_t oid[3]; - tmem_cli_va_t buf; - } ctrl; /* for cmd == TMEM_CONTROL */ struct { - +#if __XEN_INTERFACE_VERSION__ < 0x00040600 uint64_t oid[3]; +#else + xen_tmem_oid_t oid; +#endif uint32_t index; uint32_t tmem_offset; uint32_t pfn_offset; uint32_t len; - tmem_cli_mfn_t cmfn; /* client machine page frame */ + xen_pfn_t cmfn; /* client machine page frame */ } gen; /* for all other cmd ("generic") */ } u; }; typedef struct tmem_op tmem_op_t; DEFINE_XEN_GUEST_HANDLE(tmem_op_t); - -struct tmem_handle { - uint32_t pool_id; - uint32_t index; - uint64_t oid[3]; -}; #endif #endif /* __XEN_PUBLIC_TMEM_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/trace.h =================================================================== --- projects/clang370-import/sys/xen/interface/trace.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/trace.h (revision 288926) @@ -1,245 +1,331 @@ /****************************************************************************** * include/public/trace.h * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Mark Williamson, (C) 2004 Intel Research Cambridge * Copyright (C) 2005 Bin Ren */ #ifndef __XEN_PUBLIC_TRACE_H__ #define __XEN_PUBLIC_TRACE_H__ #define TRACE_EXTRA_MAX 7 #define TRACE_EXTRA_SHIFT 28 /* Trace classes */ #define TRC_CLS_SHIFT 16 #define TRC_GEN 0x0001f000 /* General trace */ #define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ #define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ #define TRC_HVM 0x0008f000 /* Xen HVM trace */ #define TRC_MEM 0x0010f000 /* Xen memory trace */ #define TRC_PV 0x0020f000 /* Xen PV traces */ #define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ #define TRC_HW 0x0080f000 /* Xen hardware-related traces */ #define TRC_GUEST 0x0800f000 /* Guest-generated traces */ #define TRC_ALL 0x0ffff000 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) #define TRC_HD_CYCLE_FLAG (1UL<<31) #define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) #define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) /* Trace subclasses */ #define TRC_SUBCLS_SHIFT 12 /* trace subclasses for SVM */ -#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ -#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ +#define TRC_HVM_EMUL 0x00084000 /* emulated devices */ #define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ #define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ #define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ +/* + * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are + * reserved for encoding what scheduler produced the information. The + * actual event is encoded in the last 9 bits. + * + * This means we have 8 scheduling IDs available (which means at most 8 + * schedulers generating events) and, in each scheduler, up to 512 + * different events. + */ +#define TRC_SCHED_ID_BITS 3 +#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS) +#define TRC_SCHED_ID_MASK (((1UL<cpu_offset[cpu]). */ struct t_info { uint16_t tbuf_size; /* Size in pages of each trace buffer */ uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ /* MFN lists immediately after the header */ }; #endif /* __XEN_PUBLIC_TRACE_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/vcpu.h =================================================================== --- projects/clang370-import/sys/xen/interface/vcpu.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/vcpu.h (revision 288926) @@ -1,240 +1,240 @@ /****************************************************************************** * vcpu.h * * VCPU initialisation, query, and hotplug. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_VCPU_H__ #define __XEN_PUBLIC_VCPU_H__ #include "xen.h" /* * Prototype for this hypercall is: - * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * long vcpu_op(int cmd, unsigned int vcpuid, void *extra_args) * @cmd == VCPUOP_??? (VCPU operation). * @vcpuid == VCPU to operate on. * @extra_args == Operation-specific extra arguments (NULL if none). */ /* * Initialise a VCPU. Each VCPU can be initialised only once. A * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. * * @extra_arg == pointer to vcpu_guest_context structure containing initial * state for the VCPU. */ #define VCPUOP_initialise 0 /* * Bring up a VCPU. This makes the VCPU runnable. This operation will fail * if the VCPU has not been initialised (VCPUOP_initialise). */ #define VCPUOP_up 1 /* * Bring down a VCPU (i.e., make it non-runnable). * There are a few caveats that callers should observe: * 1. This operation may return, and VCPU_is_up may return false, before the * VCPU stops running (i.e., the command is asynchronous). It is a good * idea to ensure that the VCPU has entered a non-critical loop before * bringing it down. Alternatively, this operation is guaranteed * synchronous if invoked by the VCPU itself. * 2. After a VCPU is initialised, there is currently no way to drop all its * references to domain memory. Even a VCPU that is down still holds * memory references via its pagetable base pointer and GDT. It is good * practise to move a VCPU onto an 'idle' or default page table, LDT and * GDT before bringing it down. */ #define VCPUOP_down 2 /* Returns 1 if the given VCPU is up. */ #define VCPUOP_is_up 3 /* * Return information about the state and running time of a VCPU. * @extra_arg == pointer to vcpu_runstate_info structure. */ #define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { /* VCPU's current state (RUNSTATE_*). */ int state; /* When was current state entered (system time, ns)? */ uint64_t state_entry_time; /* * Time spent in each RUNSTATE_* (ns). The sum of these times is * guaranteed not to drift from system time. */ uint64_t time[4]; }; typedef struct vcpu_runstate_info vcpu_runstate_info_t; DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); /* VCPU is currently running on a physical CPU. */ #define RUNSTATE_running 0 /* VCPU is runnable, but not currently scheduled on any physical CPU. */ #define RUNSTATE_runnable 1 /* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ #define RUNSTATE_blocked 2 /* * VCPU is not runnable, but it is not blocked. * This is a 'catch all' state for things like hotplug and pauses by the * system administrator (or for critical sections in the hypervisor). * RUNSTATE_blocked dominates this state (it is the preferred state). */ #define RUNSTATE_offline 3 /* * Register a shared memory area from which the guest may obtain its own * runstate information without needing to execute a hypercall. * Notes: * 1. The registered address may be virtual or physical or guest handle, * depending on the platform. Virtual address or guest handle should be * registered on x86 systems. * 2. Only one shared area may be registered per VCPU. The shared area is * updated by the hypervisor each time the VCPU is scheduled. Thus * runstate.state will always be RUNSTATE_running and * runstate.state_entry_time will indicate the system time at which the * VCPU was last scheduled to run. * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. */ #define VCPUOP_register_runstate_memory_area 5 struct vcpu_register_runstate_memory_area { union { XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; struct vcpu_runstate_info *v; uint64_t p; } addr; }; typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); /* * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer * which can be set via these commands. Periods smaller than one millisecond * may not be supported. */ #define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ #define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ struct vcpu_set_periodic_timer { uint64_t period_ns; }; typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); /* * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot * timer which can be set via these commands. */ #define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ #define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ struct vcpu_set_singleshot_timer { uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ uint32_t flags; /* VCPU_SSHOTTMR_??? */ }; typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); /* Flags to VCPUOP_set_singleshot_timer. */ /* Require the timeout to be in the future (return -ETIME if it's passed). */ #define _VCPU_SSHOTTMR_future (0) #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) /* * Register a memory location in the guest address space for the * vcpu_info structure. This allows the guest to place the vcpu_info * structure in a convenient place, such as in a per-cpu data area. * The pointer need not be page aligned, but the structure must not * cross a page boundary. * * This may be called only once per vcpu. */ #define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ struct vcpu_register_vcpu_info { uint64_t mfn; /* mfn of page to place vcpu_info */ uint32_t offset; /* offset within page */ uint32_t rsvd; /* unused */ }; typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ #define VCPUOP_send_nmi 11 /* * Get the physical ID information for a pinned vcpu's underlying physical * processor. The physical ID informmation is architecture-specific. * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. * This command returns -EINVAL if it is not a valid operation for this VCPU. */ #define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ struct vcpu_get_physid { uint64_t phys_id; }; typedef struct vcpu_get_physid vcpu_get_physid_t; DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); #define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) #define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) /* * Register a memory location to get a secondary copy of the vcpu time * parameters. The master copy still exists as part of the vcpu shared * memory area, and this secondary copy is updated whenever the master copy * is updated (and using the same versioning scheme for synchronisation). * * The intent is that this copy may be mapped (RO) into userspace so * that usermode can compute system time using the time info and the * tsc. Usermode will see an array of vcpu_time_info structures, one * for each vcpu, and choose the right one by an existing mechanism * which allows it to get the current vcpu number (such as via a * segment limit). It can then apply the normal algorithm to compute * system time from the tsc. * * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. */ #define VCPUOP_register_vcpu_time_memory_area 13 DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); struct vcpu_register_time_memory_area { union { XEN_GUEST_HANDLE(vcpu_time_info_t) h; struct vcpu_time_info *v; uint64_t p; } addr; }; typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); #endif /* __XEN_PUBLIC_VCPU_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/version.h =================================================================== --- projects/clang370-import/sys/xen/interface/version.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/version.h (revision 288926) @@ -1,94 +1,96 @@ /****************************************************************************** * version.h * * Xen version, type, and compile information. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Nguyen Anh Quynh * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_VERSION_H__ #define __XEN_PUBLIC_VERSION_H__ +#include "xen.h" + /* NB. All ops return zero on success, except XENVER_{version,pagesize} */ /* arg == NULL; returns major:minor (16:16). */ #define XENVER_version 0 /* arg == xen_extraversion_t. */ #define XENVER_extraversion 1 typedef char xen_extraversion_t[16]; #define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) /* arg == xen_compile_info_t. */ #define XENVER_compile_info 2 struct xen_compile_info { char compiler[64]; char compile_by[16]; char compile_domain[32]; char compile_date[32]; }; typedef struct xen_compile_info xen_compile_info_t; #define XENVER_capabilities 3 typedef char xen_capabilities_info_t[1024]; #define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) #define XENVER_changeset 4 typedef char xen_changeset_info_t[64]; #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) #define XENVER_platform_parameters 5 struct xen_platform_parameters { - unsigned long virt_start; + xen_ulong_t virt_start; }; typedef struct xen_platform_parameters xen_platform_parameters_t; #define XENVER_get_features 6 struct xen_feature_info { unsigned int submap_idx; /* IN: which 32-bit submap to return */ uint32_t submap; /* OUT: 32-bit submap */ }; typedef struct xen_feature_info xen_feature_info_t; /* Declares the features reported by XENVER_get_features. */ #include "features.h" /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 #define XENVER_commandline 9 typedef char xen_commandline_t[1024]; #endif /* __XEN_PUBLIC_VERSION_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/vm_event.h =================================================================== --- projects/clang370-import/sys/xen/interface/vm_event.h (nonexistent) +++ projects/clang370-import/sys/xen/interface/vm_event.h (revision 288926) @@ -0,0 +1,269 @@ +/****************************************************************************** + * vm_event.h + * + * Memory event common structures. + * + * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_VM_EVENT_H +#define _XEN_PUBLIC_VM_EVENT_H + +#include "xen.h" + +#define VM_EVENT_INTERFACE_VERSION 0x00000001 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#include "io/ring.h" + +/* + * Memory event flags + */ + +/* + * VCPU_PAUSED in a request signals that the vCPU triggering the event has been + * paused + * VCPU_PAUSED in a response signals to unpause the vCPU + */ +#define VM_EVENT_FLAG_VCPU_PAUSED (1 << 0) +/* Flags to aid debugging vm_event */ +#define VM_EVENT_FLAG_FOREIGN (1 << 1) +/* + * The following flags can be set in response to a mem_access event. + * + * Emulate the fault-causing instruction (if set in the event response flags). + * This will allow the guest to continue execution without lifting the page + * access restrictions. + */ +#define VM_EVENT_FLAG_EMULATE (1 << 2) +/* + * Same as VM_EVENT_FLAG_EMULATE, but with write operations or operations + * potentially having side effects (like memory mapped or port I/O) disabled. + */ +#define VM_EVENT_FLAG_EMULATE_NOWRITE (1 << 3) +/* + * Toggle singlestepping on vm_event response. + * Requires the vCPU to be paused already (synchronous events only). + */ +#define VM_EVENT_FLAG_TOGGLE_SINGLESTEP (1 << 4) +/* + * Data is being sent back to the hypervisor in the event response, to be + * returned by the read function when emulating an instruction. + * This flag is only useful when combined with VM_EVENT_FLAG_EMULATE + * and takes precedence if combined with VM_EVENT_FLAG_EMULATE_NOWRITE + * (i.e. if both VM_EVENT_FLAG_EMULATE_NOWRITE and + * VM_EVENT_FLAG_SET_EMUL_READ_DATA are set, only the latter will be honored). + */ +#define VM_EVENT_FLAG_SET_EMUL_READ_DATA (1 << 5) + /* + * Deny completion of the operation that triggered the event. + * Currently only useful for MSR, CR0, CR3 and CR4 write events. + */ +#define VM_EVENT_FLAG_DENY (1 << 6) +/* + * This flag can be set in a request or a response + * + * On a request, indicates that the event occurred in the alternate p2m specified by + * the altp2m_idx request field. + * + * On a response, indicates that the VCPU should resume in the alternate p2m specified + * by the altp2m_idx response field if possible. + */ +#define VM_EVENT_FLAG_ALTERNATE_P2M (1 << 7) + +/* + * Reasons for the vm event request + */ + +/* Default case */ +#define VM_EVENT_REASON_UNKNOWN 0 +/* Memory access violation */ +#define VM_EVENT_REASON_MEM_ACCESS 1 +/* Memory sharing event */ +#define VM_EVENT_REASON_MEM_SHARING 2 +/* Memory paging event */ +#define VM_EVENT_REASON_MEM_PAGING 3 +/* A control register was updated */ +#define VM_EVENT_REASON_WRITE_CTRLREG 4 +/* An MSR was updated. */ +#define VM_EVENT_REASON_MOV_TO_MSR 5 +/* Debug operation executed (e.g. int3) */ +#define VM_EVENT_REASON_SOFTWARE_BREAKPOINT 6 +/* Single-step (e.g. MTF) */ +#define VM_EVENT_REASON_SINGLESTEP 7 +/* An event has been requested via HVMOP_guest_request_vm_event. */ +#define VM_EVENT_REASON_GUEST_REQUEST 8 + +/* Supported values for the vm_event_write_ctrlreg index. */ +#define VM_EVENT_X86_CR0 0 +#define VM_EVENT_X86_CR3 1 +#define VM_EVENT_X86_CR4 2 +#define VM_EVENT_X86_XCR0 3 + +/* + * Using a custom struct (not hvm_hw_cpu) so as to not fill + * the vm_event ring buffer too quickly. + */ +struct vm_event_regs_x86 { + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rflags; + uint64_t dr7; + uint64_t rip; + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t msr_efer; + uint64_t msr_star; + uint64_t msr_lstar; + uint64_t fs_base; + uint64_t gs_base; + uint32_t cs_arbytes; + uint32_t _pad; +}; + +/* + * mem_access flag definitions + * + * These flags are set only as part of a mem_event request. + * + * R/W/X: Defines the type of violation that has triggered the event + * Multiple types can be set in a single violation! + * GLA_VALID: If the gla field holds a guest VA associated with the event + * FAULT_WITH_GLA: If the violation was triggered by accessing gla + * FAULT_IN_GPT: If the violation was triggered during translating gla + */ +#define MEM_ACCESS_R (1 << 0) +#define MEM_ACCESS_W (1 << 1) +#define MEM_ACCESS_X (1 << 2) +#define MEM_ACCESS_RWX (MEM_ACCESS_R | MEM_ACCESS_W | MEM_ACCESS_X) +#define MEM_ACCESS_RW (MEM_ACCESS_R | MEM_ACCESS_W) +#define MEM_ACCESS_RX (MEM_ACCESS_R | MEM_ACCESS_X) +#define MEM_ACCESS_WX (MEM_ACCESS_W | MEM_ACCESS_X) +#define MEM_ACCESS_GLA_VALID (1 << 3) +#define MEM_ACCESS_FAULT_WITH_GLA (1 << 4) +#define MEM_ACCESS_FAULT_IN_GPT (1 << 5) + +struct vm_event_mem_access { + uint64_t gfn; + uint64_t offset; + uint64_t gla; /* if flags has MEM_ACCESS_GLA_VALID set */ + uint32_t flags; /* MEM_ACCESS_* */ + uint32_t _pad; +}; + +struct vm_event_write_ctrlreg { + uint32_t index; + uint32_t _pad; + uint64_t new_value; + uint64_t old_value; +}; + +struct vm_event_debug { + uint64_t gfn; +}; + +struct vm_event_mov_to_msr { + uint64_t msr; + uint64_t value; +}; + +#define MEM_PAGING_DROP_PAGE (1 << 0) +#define MEM_PAGING_EVICT_FAIL (1 << 1) + +struct vm_event_paging { + uint64_t gfn; + uint32_t p2mt; + uint32_t flags; +}; + +struct vm_event_sharing { + uint64_t gfn; + uint32_t p2mt; + uint32_t _pad; +}; + +struct vm_event_emul_read_data { + uint32_t size; + /* The struct is used in a union with vm_event_regs_x86. */ + uint8_t data[sizeof(struct vm_event_regs_x86) - sizeof(uint32_t)]; +}; + +typedef struct vm_event_st { + uint32_t version; /* VM_EVENT_INTERFACE_VERSION */ + uint32_t flags; /* VM_EVENT_FLAG_* */ + uint32_t reason; /* VM_EVENT_REASON_* */ + uint32_t vcpu_id; + uint16_t altp2m_idx; /* may be used during request and response */ + uint16_t _pad[3]; + + union { + struct vm_event_paging mem_paging; + struct vm_event_sharing mem_sharing; + struct vm_event_mem_access mem_access; + struct vm_event_write_ctrlreg write_ctrlreg; + struct vm_event_mov_to_msr mov_to_msr; + struct vm_event_debug software_breakpoint; + struct vm_event_debug singlestep; + } u; + + union { + union { + struct vm_event_regs_x86 x86; + } regs; + + struct vm_event_emul_read_data emul_read_data; + } data; +} vm_event_request_t, vm_event_response_t; + +DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#endif /* _XEN_PUBLIC_VM_EVENT_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: projects/clang370-import/sys/xen/interface/vm_event.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Index: projects/clang370-import/sys/xen/interface/xen-compat.h =================================================================== --- projects/clang370-import/sys/xen/interface/xen-compat.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/xen-compat.h (revision 288926) @@ -1,44 +1,47 @@ /****************************************************************************** * xen-compat.h * * Guest OS interface to Xen. Compatibility layer. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2006, Christian Limpach */ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040200 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040600 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ #define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ #elif !defined(__XEN_INTERFACE_VERSION__) -/* Guests which do not specify a version get the legacy interface. */ -#define __XEN_INTERFACE_VERSION__ 0x00000000 +/* + * The interface version is not set if and only if xen/xen-os.h is not + * included. + */ +#error "Please include xen/xen-os.h" #endif #if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ #error "These header files do not support the requested interface version." #endif #endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ Index: projects/clang370-import/sys/xen/interface/xen.h =================================================================== --- projects/clang370-import/sys/xen/interface/xen.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/xen.h (revision 288926) @@ -1,844 +1,929 @@ /****************************************************************************** * xen.h * * Guest OS interface to Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004, K A Fraser */ #ifndef __XEN_PUBLIC_XEN_H__ #define __XEN_PUBLIC_XEN_H__ #include "xen-compat.h" #if defined(__i386__) || defined(__x86_64__) #include "arch-x86/xen.h" -#elif defined(__ia64__) -#include "arch-ia64.h" -#elif defined(__arm__) +#elif defined(__arm__) || defined (__aarch64__) #include "arch-arm.h" #else #error "Unsupported architecture" #endif #ifndef __ASSEMBLY__ /* Guest handles for primitive C types. */ DEFINE_XEN_GUEST_HANDLE(char); __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); DEFINE_XEN_GUEST_HANDLE(int); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +#if __XEN_INTERFACE_VERSION__ < 0x00040300 DEFINE_XEN_GUEST_HANDLE(long); __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +#endif DEFINE_XEN_GUEST_HANDLE(void); DEFINE_XEN_GUEST_HANDLE(uint64_t); DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); #endif /* * HYPERCALLS */ /* `incontents 100 hcalls List of hypercalls * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() */ #define __HYPERVISOR_set_trap_table 0 #define __HYPERVISOR_mmu_update 1 #define __HYPERVISOR_set_gdt 2 #define __HYPERVISOR_stack_switch 3 #define __HYPERVISOR_set_callbacks 4 #define __HYPERVISOR_fpu_taskswitch 5 #define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ #define __HYPERVISOR_platform_op 7 #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 #define __HYPERVISOR_update_descriptor 10 #define __HYPERVISOR_memory_op 12 #define __HYPERVISOR_multicall 13 #define __HYPERVISOR_update_va_mapping 14 #define __HYPERVISOR_set_timer_op 15 #define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ #define __HYPERVISOR_xen_version 17 #define __HYPERVISOR_console_io 18 #define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ #define __HYPERVISOR_grant_table_op 20 #define __HYPERVISOR_vm_assist 21 #define __HYPERVISOR_update_va_mapping_otherdomain 22 #define __HYPERVISOR_iret 23 /* x86 only */ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_xsm_op 27 #define __HYPERVISOR_nmi_op 28 #define __HYPERVISOR_sched_op 29 #define __HYPERVISOR_callback_op 30 #define __HYPERVISOR_xenoprof_op 31 #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 #define __HYPERVISOR_hvm_op 34 #define __HYPERVISOR_sysctl 35 #define __HYPERVISOR_domctl 36 #define __HYPERVISOR_kexec_op 37 #define __HYPERVISOR_tmem_op 38 #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ +#define __HYPERVISOR_xenpmu_op 40 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 #define __HYPERVISOR_arch_1 49 #define __HYPERVISOR_arch_2 50 #define __HYPERVISOR_arch_3 51 #define __HYPERVISOR_arch_4 52 #define __HYPERVISOR_arch_5 53 #define __HYPERVISOR_arch_6 54 #define __HYPERVISOR_arch_7 55 /* ` } */ /* * HYPERCALL COMPATIBILITY. */ /* New sched_op hypercall introduced in 0x00030101. */ #if __XEN_INTERFACE_VERSION__ < 0x00030101 #undef __HYPERVISOR_sched_op #define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat #endif /* New event-channel and physdev hypercalls introduced in 0x00030202. */ #if __XEN_INTERFACE_VERSION__ < 0x00030202 #undef __HYPERVISOR_event_channel_op #define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat #undef __HYPERVISOR_physdev_op #define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat #endif /* New platform_op hypercall introduced in 0x00030204. */ #if __XEN_INTERFACE_VERSION__ < 0x00030204 #define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op #endif /* * VIRTUAL INTERRUPTS * * Virtual interrupts that a guest OS may receive from Xen. * * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. * The latter can be allocated only once per guest: they must initially be * allocated to VCPU0 but can subsequently be re-bound. */ /* ` enum virq { */ #define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ #define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ #define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ #define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ #define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ #define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ #define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ #define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ #define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ #define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ +#define VIRQ_XENPMU 13 /* V. PMC interrupt */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 #define VIRQ_ARCH_1 17 #define VIRQ_ARCH_2 18 #define VIRQ_ARCH_3 19 #define VIRQ_ARCH_4 20 #define VIRQ_ARCH_5 21 #define VIRQ_ARCH_6 22 #define VIRQ_ARCH_7 23 /* ` } */ #define NR_VIRQS 24 /* * ` enum neg_errnoval * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], * ` unsigned count, unsigned *done_out, * ` unsigned foreigndom) * ` * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). * @count is the length of the above array. * @pdone is an output parameter indicating number of completed operations * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this * hypercall invocation. Can be DOMID_SELF. * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced * in this hypercall invocation. The value of this field * (x) encodes the PFD as follows: * x == 0 => PFD == DOMID_SELF * x != 0 => PFD == x - 1 * * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. * ------------- * ptr[1:0] == MMU_NORMAL_PT_UPDATE: * Updates an entry in a page table belonging to PFD. If updating an L1 table, * and the new table entry is valid/present, the mapped frame must belong to * FD. If attempting to map an I/O page then the caller assumes the privilege * of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen's heap space. * ptr[:2] -- Machine address of the page-table entry to modify. * val -- Value to write. * * There also certain implicit requirements when using this hypercall. The * pages that make up a pagetable must be mapped read-only in the guest. * This prevents uncontrolled guest updates to the pagetable. Xen strictly * enforces this, and will disallow any pagetable update which will end up * mapping pagetable page RW, and will disallow using any writable page as a * pagetable. In practice it means that when constructing a page table for a * process, thread, etc, we MUST be very dilligient in following these rules: * 1). Start with top-level page (PGD or in Xen language: L4). Fill out * the entries. * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD * or L2). * 3). Start filling out the PTE table (L1) with the PTE entries. Once * done, make sure to set each of those entries to RO (so writeable bit * is unset). Once that has been completed, set the PMD (L2) for this * PTE table as RO. * 4). When completed with all of the PMD (L2) entries, and all of them have * been set to RO, make sure to set RO the PUD (L3). Do the same * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. * 5). Now before you can use those pages (so setting the cr3), you MUST also * pin them so that the hypervisor can verify the entries. This is done * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be * issued. * For 32-bit guests, the L4 is not used (as there is less pagetables), so * instead use L3. * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE * hypercall. Also if so desired the OS can also try to write to the PTE * and be trapped by the hypervisor (as the PTE entry is RO). * * To deallocate the pages, the operations are the reverse of the steps * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the * pagetable MUST not be in use (meaning that the cr3 is not set to it). * * ptr[1:0] == MMU_MACHPHYS_UPDATE: * Updates an entry in the machine->pseudo-physical mapping table. * ptr[:2] -- Machine address within the frame whose mapping to modify. * The frame must belong to the FD, if one is specified. * val -- Value to write into the mapping entry. * * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * with those in @val. * * @val is usually the machine frame number along with some attributes. * The attributes by default follow the architecture defined bits. Meaning that * if this is a X86_64 machine and four page table layout is used, the layout * of val is: * - 63 if set means No execute (NX) * - 46-13 the machine frame number * - 12 available for guest * - 11 available for guest * - 10 available for guest * - 9 available for guest * - 8 global * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) * - 6 dirty * - 5 accessed * - 4 page cached disabled * - 3 page write through * - 2 userspace accessible * - 1 writeable * - 0 present * * The one bits that does not fit with the default layout is the PAGE_PSE * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB * (or 2MB) instead of using the PAGE_PSE bit. * * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen * using it as the Page Attribute Table (PAT) bit - for details on it please * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of * pages instead of using MTRRs. * - * The PAT MSR is as follow (it is a 64-bit value, each entry is 8 bits): - * PAT4 PAT0 - * +---+----+----+----+-----+----+----+ - * WC | WC | WB | UC | UC- | WC | WB | <= Linux - * +---+----+----+----+-----+----+----+ - * WC | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) - * +---+----+----+----+-----+----+----+ - * WC | WP | WC | UC | UC- | WT | WB | <= Xen - * +---+----+----+----+-----+----+----+ + * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +-----+-----+----+----+----+-----+----+----+ + * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen + * +-----+-----+----+----+----+-----+----+----+ * * The lookup of this index table translates to looking up * Bit 7, Bit 4, and Bit 3 of val entry: * * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). * * If all bits are off, then we are using PAT0. If bit 3 turned on, * then we are using PAT1, if bit 3 and bit 4, then PAT2.. * * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means * that if a guest that follows Linux's PAT setup and would like to set Write * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the * caching as: * * WB = none (so PAT0) * WC = PWT (bit 3 on) * UC = PWT | PCD (bit 3 and 4 are on). * * To make it work with Xen, it needs to translate the WC bit as so: * * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 * * And to translate back it would: * * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ #define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ /* * MMU EXTENDED OPERATIONS - * - * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * + * ` enum neg_errnoval + * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[], + * ` unsigned int count, + * ` unsigned int *pdone, + * ` unsigned int foreigndom) + */ +/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. * A foreigndom (FD) can be specified (or DOMID_SELF for none). * Where the FD has some effect, it is described below. - * + * * cmd: MMUEXT_(UN)PIN_*_TABLE * mfn: Machine frame number to be (un)pinned as a p.t. page. * The frame must belong to the FD, if one is specified. - * + * * cmd: MMUEXT_NEW_BASEPTR * mfn: Machine frame number of new page-table base to install in MMU. - * + * * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] * mfn: Machine frame number of new page-table base to install in MMU * when in user space. - * + * * cmd: MMUEXT_TLB_FLUSH_LOCAL * No additional arguments. Flushes local TLB. - * + * * cmd: MMUEXT_INVLPG_LOCAL * linear_addr: Linear address to be flushed from the local TLB. - * + * * cmd: MMUEXT_TLB_FLUSH_MULTI * vcpumask: Pointer to bitmap of VCPUs to be flushed. - * + * * cmd: MMUEXT_INVLPG_MULTI * linear_addr: Linear address to be flushed. * vcpumask: Pointer to bitmap of VCPUs to be flushed. - * + * * cmd: MMUEXT_TLB_FLUSH_ALL * No additional arguments. Flushes all VCPUs' TLBs. - * + * * cmd: MMUEXT_INVLPG_ALL * linear_addr: Linear address to be flushed from all VCPUs' TLBs. - * + * * cmd: MMUEXT_FLUSH_CACHE * No additional arguments. Writes back and flushes cache contents. * * cmd: MMUEXT_FLUSH_CACHE_GLOBAL * No additional arguments. Writes back and flushes cache contents * on all CPUs in the system. - * + * * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. * * cmd: MMUEXT_CLEAR_PAGE * mfn: Machine frame number to be cleared. * * cmd: MMUEXT_COPY_PAGE * mfn: Machine frame number of the destination page. * src_mfn: Machine frame number of the source page. * * cmd: MMUEXT_[UN]MARK_SUPER * mfn: Machine frame number of head of superpage to be [un]marked. */ +/* ` enum mmuext_cmd { */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 #define MMUEXT_PIN_L3_TABLE 2 #define MMUEXT_PIN_L4_TABLE 3 #define MMUEXT_UNPIN_TABLE 4 #define MMUEXT_NEW_BASEPTR 5 #define MMUEXT_TLB_FLUSH_LOCAL 6 #define MMUEXT_INVLPG_LOCAL 7 #define MMUEXT_TLB_FLUSH_MULTI 8 #define MMUEXT_INVLPG_MULTI 9 #define MMUEXT_TLB_FLUSH_ALL 10 #define MMUEXT_INVLPG_ALL 11 #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 #define MMUEXT_CLEAR_PAGE 16 #define MMUEXT_COPY_PAGE 17 #define MMUEXT_FLUSH_CACHE_GLOBAL 18 #define MMUEXT_MARK_SUPER 19 #define MMUEXT_UNMARK_SUPER 20 +/* ` } */ #ifndef __ASSEMBLY__ struct mmuext_op { - unsigned int cmd; + unsigned int cmd; /* => enum mmuext_cmd */ union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; } arg1; union { /* SET_LDT */ unsigned int nr_ents; /* TLB_FLUSH_MULTI, INVLPG_MULTI */ #if __XEN_INTERFACE_VERSION__ >= 0x00030205 XEN_GUEST_HANDLE(const_void) vcpumask; #else const void *vcpumask; #endif /* COPY_PAGE */ xen_pfn_t src_mfn; } arg2; }; typedef struct mmuext_op mmuext_op_t; DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #endif +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val, + * ` enum uvm_flags flags) + * ` + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val, + * ` enum uvm_flags flags, + * ` domid_t domid) + * ` + * ` @va: The virtual address whose mapping we want to change + * ` @val: The new page table entry, must contain a machine address + * ` @flags: Control TLB flushes + */ /* These are passed as 'flags' to update_va_mapping. They can be ORed. */ /* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ /* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +/* ` enum uvm_flags { */ #define UVMF_NONE (0UL<<0) /* No flushing at all. */ #define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ #define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ #define UVMF_FLUSHTYPE_MASK (3UL<<0) #define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ #define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ #define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +/* ` } */ /* * Commands to HYPERVISOR_console_io(). */ #define CONSOLEIO_write 0 #define CONSOLEIO_read 1 /* * Commands to HYPERVISOR_vm_assist(). */ #define VMASST_CMD_enable 0 #define VMASST_CMD_disable 1 /* x86/32 guests: simulate full 4GB segment limits. */ #define VMASST_TYPE_4gb_segments 0 /* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ #define VMASST_TYPE_4gb_segments_notify 1 /* * x86 guests: support writes to bottom-level PTEs. * NB1. Page-directory entries cannot be written. * NB2. Guest must continue to remove all writable mappings of PTEs. */ #define VMASST_TYPE_writable_pagetables 2 /* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 +/* + * x86/64 guests: strictly hide M2P from user mode. + * This allows the guest to control respective hypervisor behavior: + * - when not set, L4 tables get created with the respective slot blank, + * and whenever the L4 table gets used as a kernel one the missing + * mapping gets inserted, + * - when set, L4 tables get created with the respective slot initialized + * as before, and whenever the L4 table gets used as a user one the + * mapping gets zapped. + */ +#define VMASST_TYPE_m2p_strict 32 + +#if __XEN_INTERFACE_VERSION__ < 0x00040600 #define MAX_VMASST_TYPE 3 +#endif #ifndef __ASSEMBLY__ typedef uint16_t domid_t; /* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ #define DOMID_FIRST_RESERVED (0x7FF0U) /* DOMID_SELF is used in certain contexts to refer to oneself. */ #define DOMID_SELF (0x7FF0U) /* * DOMID_IO is used to restrict page-table updates to mapping I/O memory. * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO * is useful to ensure that no mappings to the OS's own heap are accidentally * installed. (e.g., in Linux this could cause havoc as reference counts * aren't adjusted on the I/O-mapping code path). * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can * be specified by any calling domain. */ #define DOMID_IO (0x7FF1U) /* * DOMID_XEN is used to allow privileged domains to map restricted parts of * Xen's heap space (e.g., the machine_to_phys table). * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if * the caller is privileged. */ #define DOMID_XEN (0x7FF2U) /* * DOMID_COW is used as the owner of sharable pages */ #define DOMID_COW (0x7FF3U) /* DOMID_INVALID is used to identify pages with unknown owner. */ #define DOMID_INVALID (0x7FF4U) /* Idle domain. */ #define DOMID_IDLE (0x7FFFU) /* * Send an array of these to HYPERVISOR_mmu_update(). * NB. The fields are natural pointer/address size for this architecture. */ struct mmu_update { uint64_t ptr; /* Machine address of PTE. */ uint64_t val; /* New contents of PTE. */ }; typedef struct mmu_update mmu_update_t; DEFINE_XEN_GUEST_HANDLE(mmu_update_t); /* - * Send an array of these to HYPERVISOR_multicall(). - * NB. The fields are natural register size for this architecture. + * ` enum neg_errnoval + * ` HYPERVISOR_multicall(multicall_entry_t call_list[], + * ` uint32_t nr_calls); + * + * NB. The fields are logically the natural register size for this + * architecture. In cases where xen_ulong_t is larger than this then + * any unused bits in the upper portion must be zero. */ struct multicall_entry { - unsigned long op, result; - unsigned long args[6]; + xen_ulong_t op, result; + xen_ulong_t args[6]; }; typedef struct multicall_entry multicall_entry_t; DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); +#if __XEN_INTERFACE_VERSION__ < 0x00040400 /* - * Event channel endpoints per domain: + * Event channel endpoints per domain (when using the 2-level ABI): * 1024 if a long is 32 bits; 4096 if a long is 64 bits. */ -#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) +#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS +#endif struct vcpu_time_info { /* * Updates to the following values are preceded and followed by an * increment of 'version'. The guest can therefore detect updates by * looking for changes to 'version'. If the least-significant bit of * the version number is set then an update is in progress and the guest * must wait to read a consistent set of values. * The correct way to interact with the version number is similar to * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. */ uint32_t version; uint32_t pad0; uint64_t tsc_timestamp; /* TSC at last update of time vals. */ uint64_t system_time; /* Time, in nanosecs, since boot. */ /* * Current system time: * system_time + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) * CPU frequency (Hz): * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift */ uint32_t tsc_to_system_mul; int8_t tsc_shift; int8_t pad1[3]; }; /* 32 bytes */ typedef struct vcpu_time_info vcpu_time_info_t; struct vcpu_info { /* * 'evtchn_upcall_pending' is written non-zero by Xen to indicate * a pending notification for a particular VCPU. It is then cleared * by the guest OS /before/ checking for pending work, thus avoiding * a set-and-check race. Note that the mask is only accessed by Xen * on the CPU that is currently hosting the VCPU. This means that the * pending and mask flags can be updated by the guest without special * synchronisation (i.e., no need for the x86 LOCK prefix). * This may seem suboptimal because if the pending flag is set by * a different CPU then an IPI may be scheduled even when the mask * is set. However, note: * 1. The task of 'interrupt holdoff' is covered by the per-event- * channel mask bits. A 'noisy' event that is continually being * triggered can be masked at source at this very precise * granularity. * 2. The main purpose of the per-VCPU mask is therefore to restrict * reentrant execution: whether for concurrency control, or to * prevent unbounded stack usage. Whatever the purpose, we expect * that the mask will be asserted only for short periods at a time, * and so the likelihood of a 'spurious' IPI is suitably small. * The mask is read before making an event upcall to the guest: a * non-zero mask therefore guarantees that the VCPU will not receive * an upcall activation. The mask is cleared when the VCPU requests * to block: this avoids wakeup-waiting races. */ uint8_t evtchn_upcall_pending; +#ifdef XEN_HAVE_PV_UPCALL_MASK uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; +#else /* XEN_HAVE_PV_UPCALL_MASK */ + uint8_t pad0; +#endif /* XEN_HAVE_PV_UPCALL_MASK */ + xen_ulong_t evtchn_pending_sel; struct arch_vcpu_info arch; struct vcpu_time_info time; }; /* 64 bytes (x86) */ #ifndef __XEN__ typedef struct vcpu_info vcpu_info_t; #endif /* + * `incontents 200 startofday_shared Start-of-day shared data structure * Xen/kernel shared data -- pointer provided in start_info. * * This structure is defined to be both smaller than a page, and the * only data on the shared page, but may vary in actual size even within * compatible Xen versions; guests should not rely on the size * of this structure remaining constant. */ struct shared_info { struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; /* * A domain can create "event channels" on which it can send and receive * asynchronous event notifications. There are three classes of event that * are delivered by this mechanism: * 1. Bi-directional inter- and intra-domain connections. Domains must * arrange out-of-band to set up a connection (usually by allocating * an unbound 'listener' port and avertising that via a storage service * such as xenstore). * 2. Physical interrupts. A domain with suitable hardware-access * privileges can bind an event-channel port to a physical interrupt * source. * 3. Virtual interrupts ('events'). A domain can bind an event-channel * port to a virtual interrupt source, such as the virtual-timer * device or the emergency console. * * Event channels are addressed by a "port index". Each channel is * associated with two bits of information: * 1. PENDING -- notifies the domain that there is a pending notification * to be processed. This bit is cleared by the guest. * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING * will cause an asynchronous upcall to be scheduled. This bit is only * updated by the guest. It is read-only within Xen. If a channel * becomes pending while the channel is masked then the 'edge' is lost * (i.e., when the channel is unmasked, the guest must manually handle * pending notifications as no upcall will be scheduled by Xen). * * To expedite scanning of pending notifications, any 0->1 pending * transition on an unmasked channel causes a corresponding bit in a * per-vcpu selector word to be set. Each bit in the selector covers a * 'C long' in the PENDING bitfield array. */ - unsigned long evtchn_pending[sizeof(unsigned long) * 8]; - unsigned long evtchn_mask[sizeof(unsigned long) * 8]; + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; /* * Wallclock time: updated only by control software. Guests should base * their gettimeofday() syscall on this wallclock-base value. */ uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ +#if !defined(__i386__) + uint32_t wc_sec_hi; +# define xen_wc_sec_hi wc_sec_hi +#elif !defined(__XEN__) && !defined(__XEN_TOOLS__) +# define xen_wc_sec_hi arch.wc_sec_hi +#endif struct arch_shared_info arch; }; #ifndef __XEN__ typedef struct shared_info shared_info_t; #endif /* - * Start-of-day memory layout: + * `incontents 200 startofday Start-of-day memory layout + * * 1. The domain is started within contiguous virtual-memory region. * 2. The contiguous region ends on an aligned 4MB boundary. * 3. This the order of bootstrap elements in the initial virtual region: * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] + * (may be omitted) * c. list of allocated page frames [mfn_list, nr_pages] * (unless relocated due to XEN_ELFNOTE_INIT_P2M) * d. start_info_t structure [register ESI (x86)] - * e. bootstrap page tables [pt_base, CR3 (x86)] - * f. bootstrap stack [register ESP (x86)] + * in case of dom0 this page contains the console info, too + * e. unless dom0: xenstore ring page + * f. unless dom0: console ring page + * g. bootstrap page tables [pt_base and CR3 (x86)] + * h. bootstrap stack [register ESP (x86)] * 4. Bootstrap elements are packed together, but each is 4kB-aligned. - * 5. The initial ram disk may be omitted. - * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * 5. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * 6. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. - * 8. There is guaranteed to be at least 512kB padding after the final + * 7. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. + * + * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page + * table layout") a bug caused the pt_base (3.g above) and cr3 to not point + * to the start of the guest page tables (it was offset by two pages). + * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU + * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got + * allocated in the order: 'first L1','first L2', 'first L3', so the offset + * to the page table base is by two pages back. The initial domain if it is + * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the + * pages preceding pt_base and mark them as reserved/unused. */ - -#define MAX_GUEST_CMDLINE 1024 +#ifdef XEN_HAVE_PV_GUEST_ENTRY struct start_info { /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ char magic[32]; /* "xen--". */ unsigned long nr_pages; /* Total pages allocated to this domain. */ unsigned long shared_info; /* MACHINE address of shared info struct. */ uint32_t flags; /* SIF_xxx flags. */ xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ uint32_t store_evtchn; /* Event channel for store communication. */ union { struct { xen_pfn_t mfn; /* MACHINE page number of console page. */ uint32_t evtchn; /* Event channel for console page. */ } domU; struct { uint32_t info_off; /* Offset of console_info struct. */ uint32_t info_size; /* Size of console_info struct from start.*/ } dom0; } console; /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ unsigned long pt_base; /* VIRTUAL address of page directory. */ unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ /* (PFN of pre-loaded module if */ /* SIF_MOD_START_PFN set in flags). */ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ +#define MAX_GUEST_CMDLINE 1024 int8_t cmd_line[MAX_GUEST_CMDLINE]; /* The pfn range here covers both page table and p->m table frames. */ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ }; typedef struct start_info start_info_t; /* New console union for dom0 introduced in 0x00030203. */ #if __XEN_INTERFACE_VERSION__ < 0x00030203 #define console_mfn console.domU.mfn #define console_evtchn console.domU.evtchn #endif +#endif /* XEN_HAVE_PV_GUEST_ENTRY */ /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ #define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ #define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */ + /* P->M making the 3 level tree obsolete? */ #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ /* * A multiboot module is a package containing modules very similar to a * multiboot module array. The only differences are: * - the array of module descriptors is by convention simply at the beginning * of the multiboot module, * - addresses in the module descriptors are based on the beginning of the * multiboot module, * - the number of modules is determined by a termination descriptor that has * mod_start == 0. * * This permits to both build it statically and reference it in a configuration * file, and let the PV guest easily rebase the addresses to virtual addresses * and at the same time count the number of modules. */ struct xen_multiboot_mod_list { /* Address of first byte of the module */ uint32_t mod_start; /* Address of last byte of the module (inclusive) */ uint32_t mod_end; /* Address of zero-terminated command line */ uint32_t cmdline; /* Unused, must be zero */ uint32_t pad; }; - +/* + * `incontents 200 startofday_dom0_console Dom0_console + * + * The console structure in start_info.console.dom0 + * + * This structure includes a variety of information required to + * have a working VGA/VESA console. + */ typedef struct dom0_vga_console_info { uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ #define XEN_VGATYPE_TEXT_MODE_3 0x03 #define XEN_VGATYPE_VESA_LFB 0x23 #define XEN_VGATYPE_EFI_LFB 0x70 union { struct { /* Font height, in pixels. */ uint16_t font_height; /* Cursor location (column, row). */ uint16_t cursor_x, cursor_y; /* Number of rows and columns (dimensions in characters). */ uint16_t rows, columns; } text_mode_3; struct { /* Width and height, in pixels. */ uint16_t width, height; /* Bytes per scan line. */ uint16_t bytes_per_line; /* Bits per pixel. */ uint16_t bits_per_pixel; /* LFB physical address, and size (in units of 64kB). */ uint32_t lfb_base; uint32_t lfb_size; /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ uint8_t red_pos, red_size; uint8_t green_pos, green_size; uint8_t blue_pos, blue_size; uint8_t rsvd_pos, rsvd_size; #if __XEN_INTERFACE_VERSION__ >= 0x00030206 /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ uint32_t gbl_caps; /* Mode attributes (offset 0x0, VESA command 0x4f01). */ uint16_t mode_attrs; #endif } vesa_lfb; } u; } dom0_vga_console_info_t; #define xen_vga_console_info dom0_vga_console_info #define xen_vga_console_info_t dom0_vga_console_info_t typedef uint8_t xen_domain_handle_t[16]; /* Turn a plain number into a C unsigned long constant. */ #define __mk_unsigned_long(x) x ## UL #define mk_unsigned_long(x) __mk_unsigned_long(x) __DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); __DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); __DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); __DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); #else /* __ASSEMBLY__ */ /* In assembly code we cannot use C numeric constant suffixes. */ #define mk_unsigned_long(x) x #endif /* !__ASSEMBLY__ */ /* Default definitions for macros used by domctl/sysctl. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) +#ifndef int64_aligned_t +#define int64_aligned_t int64_t +#endif #ifndef uint64_aligned_t #define uint64_aligned_t uint64_t #endif #ifndef XEN_GUEST_HANDLE_64 #define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) #endif #ifndef __ASSEMBLY__ -struct xenctl_cpumap { +struct xenctl_bitmap { XEN_GUEST_HANDLE_64(uint8) bitmap; - uint32_t nr_cpus; + uint32_t nr_bits; }; #endif #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #endif /* __XEN_PUBLIC_XEN_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/xenoprof.h =================================================================== --- projects/clang370-import/sys/xen/interface/xenoprof.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/xenoprof.h (revision 288926) @@ -1,152 +1,152 @@ /****************************************************************************** * xenoprof.h * * Interface for enabling system wide profiling based on hardware performance * counters * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (C) 2005 Hewlett-Packard Co. * Written by Aravind Menon & Jose Renato Santos */ #ifndef __XEN_PUBLIC_XENOPROF_H__ #define __XEN_PUBLIC_XENOPROF_H__ #include "xen.h" /* * Commands to HYPERVISOR_xenoprof_op(). */ #define XENOPROF_init 0 #define XENOPROF_reset_active_list 1 #define XENOPROF_reset_passive_list 2 #define XENOPROF_set_active 3 #define XENOPROF_set_passive 4 #define XENOPROF_reserve_counters 5 #define XENOPROF_counter 6 #define XENOPROF_setup_events 7 #define XENOPROF_enable_virq 8 #define XENOPROF_start 9 #define XENOPROF_stop 10 #define XENOPROF_disable_virq 11 #define XENOPROF_release_counters 12 #define XENOPROF_shutdown 13 #define XENOPROF_get_buffer 14 #define XENOPROF_set_backtrace 15 /* AMD IBS support */ #define XENOPROF_get_ibs_caps 16 #define XENOPROF_ibs_counter 17 #define XENOPROF_last_op 17 #define MAX_OPROF_EVENTS 32 #define MAX_OPROF_DOMAINS 25 #define XENOPROF_CPU_TYPE_SIZE 64 /* Xenoprof performance events (not Xen events) */ struct event_log { uint64_t eip; uint8_t mode; uint8_t event; }; /* PC value that indicates a special code */ #define XENOPROF_ESCAPE_CODE (~0ULL) /* Transient events for the xenoprof->oprofile cpu buf */ #define XENOPROF_TRACE_BEGIN 1 /* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ struct xenoprof_buf { uint32_t event_head; uint32_t event_tail; uint32_t event_size; uint32_t vcpu_id; uint64_t xen_samples; uint64_t kernel_samples; uint64_t user_samples; uint64_t lost_samples; struct event_log event_log[1]; }; #ifndef __XEN__ typedef struct xenoprof_buf xenoprof_buf_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); #endif struct xenoprof_init { int32_t num_events; int32_t is_primary; char cpu_type[XENOPROF_CPU_TYPE_SIZE]; }; typedef struct xenoprof_init xenoprof_init_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); struct xenoprof_get_buffer { int32_t max_samples; int32_t nbuf; int32_t bufsize; uint64_t buf_gmaddr; }; typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); struct xenoprof_counter { uint32_t ind; uint64_t count; uint32_t enabled; uint32_t event; uint32_t hypervisor; uint32_t kernel; uint32_t user; uint64_t unit_mask; }; typedef struct xenoprof_counter xenoprof_counter_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); typedef struct xenoprof_passive { uint16_t domain_id; int32_t max_samples; int32_t nbuf; int32_t bufsize; uint64_t buf_gmaddr; } xenoprof_passive_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); struct xenoprof_ibs_counter { uint64_t op_enabled; uint64_t fetch_enabled; uint64_t max_cnt_fetch; uint64_t max_cnt_op; uint64_t rand_en; uint64_t dispatched_ops; }; typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); #endif /* __XEN_PUBLIC_XENOPROF_H__ */ /* * Local variables: * mode: C - * c-set-style: "BSD" + * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ Index: projects/clang370-import/sys/xen/interface/xsm/flask_op.h =================================================================== --- projects/clang370-import/sys/xen/interface/xsm/flask_op.h (revision 288925) +++ projects/clang370-import/sys/xen/interface/xsm/flask_op.h (revision 288926) @@ -1,193 +1,212 @@ /* * This file contains the flask_op hypercall commands and definitions. * * Author: George Coker, * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __FLASK_OP_H__ #define __FLASK_OP_H__ +#include "../event_channel.h" + #define XEN_FLASK_INTERFACE_VERSION 1 struct xen_flask_load { XEN_GUEST_HANDLE(char) buffer; uint32_t size; }; struct xen_flask_setenforce { uint32_t enforcing; }; struct xen_flask_sid_context { /* IN/OUT: sid to convert to/from string */ uint32_t sid; /* IN: size of the context buffer * OUT: actual size of the output context string */ uint32_t size; XEN_GUEST_HANDLE(char) context; }; struct xen_flask_access { /* IN: access request */ uint32_t ssid; uint32_t tsid; uint32_t tclass; uint32_t req; /* OUT: AVC data */ uint32_t allowed; uint32_t audit_allow; uint32_t audit_deny; uint32_t seqno; }; struct xen_flask_transition { /* IN: transition SIDs and class */ uint32_t ssid; uint32_t tsid; uint32_t tclass; /* OUT: new SID */ uint32_t newsid; }; struct xen_flask_userlist { /* IN: starting SID for list */ uint32_t start_sid; /* IN: size of user string and output buffer * OUT: number of SIDs returned */ uint32_t size; union { /* IN: user to enumerate SIDs */ XEN_GUEST_HANDLE(char) user; /* OUT: SID list */ XEN_GUEST_HANDLE(uint32) sids; } u; }; struct xen_flask_boolean { /* IN/OUT: numeric identifier for boolean [GET/SET] * If -1, name will be used and bool_id will be filled in. */ uint32_t bool_id; /* OUT: current enforcing value of boolean [GET/SET] */ uint8_t enforcing; /* OUT: pending value of boolean [GET/SET] */ uint8_t pending; /* IN: new value of boolean [SET] */ uint8_t new_value; /* IN: commit new value instead of only setting pending [SET] */ uint8_t commit; /* IN: size of boolean name buffer [GET/SET] * OUT: actual size of name [GET only] */ uint32_t size; /* IN: if bool_id is -1, used to find boolean [GET/SET] * OUT: textual name of boolean [GET only] */ XEN_GUEST_HANDLE(char) name; }; struct xen_flask_setavc_threshold { /* IN */ uint32_t threshold; }; struct xen_flask_hash_stats { /* OUT */ uint32_t entries; uint32_t buckets_used; uint32_t buckets_total; uint32_t max_chain_len; }; struct xen_flask_cache_stats { /* IN */ uint32_t cpu; /* OUT */ uint32_t lookups; uint32_t hits; uint32_t misses; uint32_t allocations; uint32_t reclaims; uint32_t frees; }; struct xen_flask_ocontext { /* IN */ uint32_t ocon; uint32_t sid; uint64_t low, high; }; struct xen_flask_peersid { /* IN */ evtchn_port_t evtchn; /* OUT */ uint32_t sid; }; +struct xen_flask_relabel { + /* IN */ + uint32_t domid; + uint32_t sid; +}; + +struct xen_flask_devicetree_label { + /* IN */ + uint32_t sid; + uint32_t length; + XEN_GUEST_HANDLE(char) path; +}; + struct xen_flask_op { uint32_t cmd; #define FLASK_LOAD 1 #define FLASK_GETENFORCE 2 #define FLASK_SETENFORCE 3 #define FLASK_CONTEXT_TO_SID 4 #define FLASK_SID_TO_CONTEXT 5 #define FLASK_ACCESS 6 #define FLASK_CREATE 7 #define FLASK_RELABEL 8 #define FLASK_USER 9 #define FLASK_POLICYVERS 10 #define FLASK_GETBOOL 11 #define FLASK_SETBOOL 12 #define FLASK_COMMITBOOLS 13 #define FLASK_MLS 14 #define FLASK_DISABLE 15 #define FLASK_GETAVC_THRESHOLD 16 #define FLASK_SETAVC_THRESHOLD 17 #define FLASK_AVC_HASHSTATS 18 #define FLASK_AVC_CACHESTATS 19 #define FLASK_MEMBER 20 #define FLASK_ADD_OCONTEXT 21 #define FLASK_DEL_OCONTEXT 22 #define FLASK_GET_PEER_SID 23 +#define FLASK_RELABEL_DOMAIN 24 +#define FLASK_DEVICETREE_LABEL 25 uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */ union { struct xen_flask_load load; struct xen_flask_setenforce enforce; /* FLASK_CONTEXT_TO_SID and FLASK_SID_TO_CONTEXT */ struct xen_flask_sid_context sid_context; struct xen_flask_access access; /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */ struct xen_flask_transition transition; struct xen_flask_userlist userlist; /* FLASK_GETBOOL, FLASK_SETBOOL */ struct xen_flask_boolean boolean; struct xen_flask_setavc_threshold setavc_threshold; struct xen_flask_hash_stats hash_stats; struct xen_flask_cache_stats cache_stats; /* FLASK_ADD_OCONTEXT, FLASK_DEL_OCONTEXT */ struct xen_flask_ocontext ocontext; struct xen_flask_peersid peersid; + struct xen_flask_relabel relabel; + struct xen_flask_devicetree_label devicetree_label; } u; }; typedef struct xen_flask_op xen_flask_op_t; DEFINE_XEN_GUEST_HANDLE(xen_flask_op_t); #endif Index: projects/clang370-import/sys/xen/xen_intr.h =================================================================== --- projects/clang370-import/sys/xen/xen_intr.h (revision 288925) +++ projects/clang370-import/sys/xen/xen_intr.h (revision 288926) @@ -1,268 +1,264 @@ /****************************************************************************** * xen_intr.h * * APIs for managing Xen event channel, virtual IRQ, and physical IRQ * notifications. * * Copyright (c) 2004, K A Fraser * Copyright (c) 2012, Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _XEN_INTR_H_ #define _XEN_INTR_H_ -#ifndef __XEN_EVTCHN_PORT_DEFINED__ -typedef uint32_t evtchn_port_t; -DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); -#define __XEN_EVTCHN_PORT_DEFINED__ 1 -#endif +#include /** Registered Xen interrupt callback handle. */ typedef void * xen_intr_handle_t; /** If non-zero, the hypervisor has been configured to use a direct vector */ extern int xen_vector_callback_enabled; /** * Associate an already allocated local event channel port an interrupt * handler. * * \param dev The device making this bind request. * \param local_port The event channel to bind. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); /** * Allocate a local event channel port, accessible by the specified * remote/foreign domain and, if successful, associate the port with * the specified interrupt handler. * * \param dev The device making this bind request. * \param remote_domain Remote domain grant permission to signal the * newly allocated local port. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ int xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); /** * Associate the specified interrupt handler with the remote event * channel port specified by remote_domain and remote_port. * * \param dev The device making this bind request. * \param remote_domain The domain peer for this event channel connection. * \param remote_port Remote domain's local port number for this event * channel port. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ int xen_intr_bind_remote_port(device_t dev, u_int remote_domain, evtchn_port_t remote_port, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); /** * Associate the specified interrupt handler with the specified Xen * virtual interrupt source. * * \param dev The device making this bind request. * \param virq The Xen virtual IRQ number for the Xen interrupt * source being hooked. * \param cpu The cpu on which interrupt events should be delivered. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); /** * Allocate a local event channel port for servicing interprocessor * interupts and, if successful, associate the port with the specified * interrupt handler. * * \param dev The device making this bind request. * \param cpu The cpu receiving the IPI. * \param filter The interrupt filter servicing this IPI. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ int xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, driver_filter_t filter, enum intr_type irqflags, xen_intr_handle_t *handlep); /** * Register a physical interrupt vector and setup the interrupt source. * * \param vector The global vector to use. * \param trig Default trigger method. * \param pol Default polarity of the interrupt. * * \returns 0 on success, otherwise an errno. */ int xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol); /** * Unbind an interrupt handler from its interrupt source. * * \param handlep A pointer to the opaque handle that was initialized * at the time the interrupt source was bound. * * \returns 0 on success, otherwise an errno. * * \note The event channel, if any, that was allocated at bind time is * closed upon successful return of this method. * * \note It is always safe to call xen_intr_unbind() on a handle that * has been initilized to NULL. */ void xen_intr_unbind(xen_intr_handle_t *handle); /** * Add a description to an interrupt handler. * * \param handle The opaque handle that was initialized at the time * the interrupt source was bound. * * \param fmt The sprintf compatible format string for the description, * followed by optional sprintf arguments. * * \returns 0 on success, otherwise an errno. */ int xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) __attribute__((format(printf, 2, 3))); /** * Signal the remote peer of an interrupt source associated with an * event channel port. * * \param handle The opaque handle that was initialized at the time * the interrupt source was bound. * * \note For xen interrupt sources other than event channel ports, * this method takes no action. */ void xen_intr_signal(xen_intr_handle_t handle); /** * Get the local event channel port number associated with this interrupt * source. * * \param handle The opaque handle that was initialized at the time * the interrupt source was bound. * * \returns 0 if the handle is invalid, otherwise positive port number. */ evtchn_port_t xen_intr_port(xen_intr_handle_t handle); /** * Setup MSI vector interrupt(s). * * \param dev The device that requests the binding. * * \param vector Requested initial vector to bind the MSI interrupt(s) to. * * \param count Number of vectors to allocate. * * \returns 0 on success, otherwise an errno. */ int xen_register_msi(device_t dev, int vector, int count); /** * Teardown a MSI vector interrupt. * * \param vector Requested vector to release. * * \returns 0 on success, otherwise an errno. */ int xen_release_msi(int vector); /** * Bind an event channel port with a handler * * \param dev The device making this bind request. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handle Opaque handle used to manage this registration. * * \returns 0 on success, otherwise an errno. */ int xen_intr_add_handler(device_t dev, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t handle); #endif /* _XEN_INTR_H_ */ Index: projects/clang370-import/sys =================================================================== --- projects/clang370-import/sys (revision 288925) +++ projects/clang370-import/sys (revision 288926) Property changes on: projects/clang370-import/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r288831-288925 Index: projects/clang370-import/usr.bin/Makefile =================================================================== --- projects/clang370-import/usr.bin/Makefile (revision 288925) +++ projects/clang370-import/usr.bin/Makefile (revision 288926) @@ -1,420 +1,299 @@ # From: @(#)Makefile 8.3 (Berkeley) 1/7/94 # $FreeBSD$ .include # XXX MISSING: deroff diction graph learn plot # spell spline struct xsend # XXX Use GNU versions: diff ld patch # Moved to secure: bdes # SUBDIR= alias \ apply \ asa \ awk \ banner \ basename \ brandelf \ bsdiff \ bzip2 \ bzip2recover \ cap_mkdb \ chat \ chpass \ cksum \ cmp \ col \ colldef \ colrm \ column \ comm \ compress \ cpuset \ csplit \ ctlstat \ cut \ dirname \ dpv \ du \ elf2aout \ elfdump \ enigma \ env \ expand \ false \ fetch \ find \ fmt \ fold \ fstat \ fsync \ gcore \ gencat \ getconf \ getent \ getopt \ grep \ gzip \ head \ hexdump \ id \ ident \ ipcrm \ ipcs \ join \ jot \ keylogin \ keylogout \ killall \ ktrace \ ktrdump \ lam \ lastcomm \ ldd \ leave \ less \ lessecho \ lesskey \ limits \ locale \ lock \ lockf \ logger \ login \ logins \ logname \ look \ lorder \ lsvfs \ lzmainfo \ m4 \ mandoc \ mesg \ minigzip \ ministat \ mkdep \ mkfifo \ mkimg \ mklocale \ mktemp \ mkulzma \ mkuzip \ mt \ ncal \ netstat \ newgrp \ nfsstat \ nice \ nl \ numactl \ nohup \ opieinfo \ opiekey \ opiepasswd \ pagesize \ passwd \ paste \ patch \ pathchk \ perror \ pr \ printenv \ printf \ procstat \ protect \ rctl \ renice \ rev \ revoke \ rpcinfo \ rs \ rup \ rusers \ rwall \ script \ sed \ send-pr \ seq \ shar \ showmount \ sockstat \ soelim \ sort \ split \ stat \ stdbuf \ su \ systat \ tabs \ tail \ tar \ tcopy \ tee \ time \ timeout \ tip \ top \ touch \ tput \ tr \ true \ truncate \ tset \ tsort \ tty \ uname \ unexpand \ uniq \ unzip \ units \ unvis \ uudecode \ uuencode \ vis \ vmstat \ w \ wall \ wc \ what \ whereis \ which \ whois \ write \ xargs \ xinstall \ xo \ xz \ xzdec \ yes # NB: keep these sorted by MK_* knobs -.if ${MK_AT} != "no" -SUBDIR+= at -.endif - -.if ${MK_ATM} != "no" -SUBDIR+= atm -.endif - -.if ${MK_BLUETOOTH} != "no" -SUBDIR+= bluetooth -.endif - -.if ${MK_BSD_CPIO} != "no" -SUBDIR+= cpio -.endif - -.if ${MK_CALENDAR} != "no" -SUBDIR+= calendar -.endif - -.if ${MK_CLANG} != "no" -SUBDIR+= clang -.endif - -.if ${MK_EE} != "no" -SUBDIR+= ee -.endif - -.if ${MK_FILE} != "no" -SUBDIR+= file -.endif - -.if ${MK_FINGER} != "no" -SUBDIR+= finger -.endif - -.if ${MK_FTP} != "no" -SUBDIR+= ftp -.endif - -.if ${MK_GAMES} != "no" -SUBDIR+= caesar -SUBDIR+= factor -SUBDIR+= fortune -SUBDIR+= grdc -SUBDIR+= morse -SUBDIR+= number -SUBDIR+= pom -SUBDIR+= primes -SUBDIR+= random -.endif - -.if ${MK_GPL_DTC} != "yes" -SUBDIR+= dtc -.endif - -.if ${MK_GROFF} != "no" -SUBDIR+= vgrind -.endif - -.if ${MK_HESIOD} != "no" -SUBDIR+= hesinfo -.endif - -.if ${MK_ICONV} != "no" -SUBDIR+= iconv -SUBDIR+= mkcsmapper -SUBDIR+= mkesdb -.endif - -.if ${MK_ISCSI} != "no" -SUBDIR+= iscsictl -.endif - -.if ${MK_KDUMP} != "no" -SUBDIR+= kdump -SUBDIR+= truss -.endif - -.if ${MK_KERBEROS_SUPPORT} != "no" -SUBDIR+= compile_et -.endif - -.if ${MK_LDNS_UTILS} != "no" -SUBDIR+= drill -SUBDIR+= host -.endif - -.if ${MK_LOCATE} != "no" -SUBDIR+= locate -.endif - +SUBDIR.${MK_AT}+= at +SUBDIR.${MK_ATM}+= atm +SUBDIR.${MK_BLUETOOTH}+= bluetooth +SUBDIR.${MK_BSD_CPIO}+= cpio +SUBDIR.${MK_CALENDAR}+= calendar +SUBDIR.${MK_CLANG}+= clang +SUBDIR.${MK_EE}+= ee +SUBDIR.${MK_FILE}+= file +SUBDIR.${MK_FINGER}+= finger +SUBDIR.${MK_FTP}+= ftp +SUBDIR.${MK_GAMES}+= caesar +SUBDIR.${MK_GAMES}+= factor +SUBDIR.${MK_GAMES}+= fortune +SUBDIR.${MK_GAMES}+= grdc +SUBDIR.${MK_GAMES}+= morse +SUBDIR.${MK_GAMES}+= number +SUBDIR.${MK_GAMES}+= pom +SUBDIR.${MK_GAMES}+= primes +SUBDIR.${MK_GAMES}+= random +SUBDIR.${MK_GPL_DTC}+= dtc +SUBDIR.${MK_GROFF}+= vgrind +SUBDIR.${MK_HESIOD}+= hesinfo +SUBDIR.${MK_ICONV}+= iconv +SUBDIR.${MK_ICONV}+= mkcsmapper +SUBDIR.${MK_ICONV}+= mkesdb +SUBDIR.${MK_ISCSI}+= iscsictl +SUBDIR.${MK_KDUMP}+= kdump +SUBDIR.${MK_KDUMP}+= truss +SUBDIR.${MK_KERBEROS_SUPPORT}+= compile_et +SUBDIR.${MK_LDNS_UTILS}+= drill +SUBDIR.${MK_LDNS_UTILS}+= host +SUBDIR.${MK_LOCATE}+= locate # XXX msgs? -.if ${MK_MAIL} != "no" -SUBDIR+= biff -SUBDIR+= from -SUBDIR+= mail -SUBDIR+= msgs +SUBDIR.${MK_MAIL}+= biff +SUBDIR.${MK_MAIL}+= from +SUBDIR.${MK_MAIL}+= mail +SUBDIR.${MK_MAIL}+= msgs +SUBDIR.${MK_MAKE}+= bmake +SUBDIR.${MK_MAN_UTILS}+= catman +.if ${MK_MANDOCDB} == "no" # AND +SUBDIR.${MK_MAN_UTILS}+= makewhatis .endif - -.if ${MK_MAKE} != "no" -SUBDIR+= bmake -.endif - -.if ${MK_MAN_UTILS} != "no" -SUBDIR+= catman -.if ${MK_MANDOCDB} == "no" -SUBDIR+= makewhatis -.endif -SUBDIR+= man -.endif - -.if ${MK_NETCAT} != "no" -SUBDIR+= nc -.endif - -.if ${MK_NIS} != "no" -SUBDIR+= ypcat -SUBDIR+= ypmatch -SUBDIR+= ypwhich -.endif - -.if ${MK_OPENSSH} != "no" -SUBDIR+= ssh-copy-id -.endif - -.if ${MK_OPENSSL} != "no" -SUBDIR+= bc -SUBDIR+= chkey -SUBDIR+= dc -SUBDIR+= newkey -.endif - -.if ${MK_QUOTAS} != "no" -SUBDIR+= quota -.endif - -.if ${MK_RCMDS} != "no" -SUBDIR+= rlogin -SUBDIR+= rsh -SUBDIR+= ruptime -SUBDIR+= rwho -.endif - -.if ${MK_SENDMAIL} != "no" -SUBDIR+= vacation -.endif - -.if ${MK_TALK} != "no" -SUBDIR+= talk -.endif - -.if ${MK_TELNET} != "no" -SUBDIR+= telnet -.endif - -.if ${MK_TESTS} != "no" -SUBDIR+= tests -.endif - -.if ${MK_TEXTPROC} != "no" -SUBDIR+= checknr -SUBDIR+= colcrt -SUBDIR+= ul -.endif - -.if ${MK_TFTP} != "no" -SUBDIR+= tftp -.endif - -.if ${MK_TOOLCHAIN} != "no" -SUBDIR+= addr2line -SUBDIR+= ar -SUBDIR+= c89 -SUBDIR+= c99 -SUBDIR+= ctags -SUBDIR+= cxxfilt -SUBDIR+= elfcopy -SUBDIR+= file2c +SUBDIR.${MK_MAN_UTILS}+= man +SUBDIR.${MK_NETCAT}+= nc +SUBDIR.${MK_NIS}+= ypcat +SUBDIR.${MK_NIS}+= ypmatch +SUBDIR.${MK_NIS}+= ypwhich +SUBDIR.${MK_OPENSSH}+= ssh-copy-id +SUBDIR.${MK_OPENSSL}+= bc +SUBDIR.${MK_OPENSSL}+= chkey +SUBDIR.${MK_OPENSSL}+= dc +SUBDIR.${MK_OPENSSL}+= newkey +SUBDIR.${MK_QUOTAS}+= quota +SUBDIR.${MK_RCMDS}+= rlogin +SUBDIR.${MK_RCMDS}+= rsh +SUBDIR.${MK_RCMDS}+= ruptime +SUBDIR.${MK_RCMDS}+= rwho +SUBDIR.${MK_SENDMAIL}+= vacation +SUBDIR.${MK_TALK}+= talk +SUBDIR.${MK_TELNET}+= telnet +SUBDIR.${MK_TESTS}+= tests +SUBDIR.${MK_TEXTPROC}+= checknr +SUBDIR.${MK_TEXTPROC}+= colcrt +SUBDIR.${MK_TEXTPROC}+= ul +SUBDIR.${MK_TFTP}+= tftp +SUBDIR.${MK_TOOLCHAIN}+= addr2line +SUBDIR.${MK_TOOLCHAIN}+= ar +SUBDIR.${MK_TOOLCHAIN}+= c89 +SUBDIR.${MK_TOOLCHAIN}+= c99 +SUBDIR.${MK_TOOLCHAIN}+= ctags +SUBDIR.${MK_TOOLCHAIN}+= cxxfilt +SUBDIR.${MK_TOOLCHAIN}+= elfcopy +SUBDIR.${MK_TOOLCHAIN}+= file2c .if ${MACHINE_ARCH} != "aarch64" # ARM64TODO gprof does not build -SUBDIR+= gprof +SUBDIR.${MK_TOOLCHAIN}+= gprof .endif -SUBDIR+= indent -SUBDIR+= lex -SUBDIR+= mkstr -SUBDIR+= nm -SUBDIR+= readelf -SUBDIR+= rpcgen -SUBDIR+= unifdef -SUBDIR+= size -SUBDIR+= strings +SUBDIR.${MK_TOOLCHAIN}+= indent +SUBDIR.${MK_TOOLCHAIN}+= lex +SUBDIR.${MK_TOOLCHAIN}+= mkstr +SUBDIR.${MK_TOOLCHAIN}+= nm +SUBDIR.${MK_TOOLCHAIN}+= readelf +SUBDIR.${MK_TOOLCHAIN}+= rpcgen +SUBDIR.${MK_TOOLCHAIN}+= unifdef +SUBDIR.${MK_TOOLCHAIN}+= size +SUBDIR.${MK_TOOLCHAIN}+= strings .if ${MACHINE_ARCH} != "aarch64" # ARM64TODO xlint does not build -SUBDIR+= xlint +SUBDIR.${MK_TOOLCHAIN}+= xlint .endif -SUBDIR+= xstr -SUBDIR+= yacc -.endif +SUBDIR.${MK_TOOLCHAIN}+= xstr +SUBDIR.${MK_TOOLCHAIN}+= yacc +SUBDIR.${MK_VI}+= vi +SUBDIR.${MK_VT}+= vtfontcvt +SUBDIR.${MK_USB}+= usbhidaction +SUBDIR.${MK_USB}+= usbhidctl +SUBDIR.${MK_UTMPX}+= last +SUBDIR.${MK_UTMPX}+= users +SUBDIR.${MK_UTMPX}+= who +SUBDIR.${MK_SVN}+= svn +SUBDIR.${MK_SVNLITE}+= svn -.if ${MK_VI} != "no" -SUBDIR+= vi -.endif - -.if ${MK_VT} != "no" -SUBDIR+= vtfontcvt -.endif - -.if ${MK_USB} != "no" -SUBDIR+= usbhidaction -SUBDIR+= usbhidctl -.endif - -.if ${MK_UTMPX} != "no" -SUBDIR+= last -SUBDIR+= users -SUBDIR+= who -.endif - -.if ${MK_SVN} == "yes" || ${MK_SVNLITE} == "yes" -SUBDIR+= svn -.endif - .include -SUBDIR:= ${SUBDIR:O} +SUBDIR:= ${SUBDIR:O:u} SUBDIR_PARALLEL= .include Index: projects/clang370-import/usr.bin/truss/setup.c =================================================================== --- projects/clang370-import/usr.bin/truss/setup.c (revision 288925) +++ projects/clang370-import/usr.bin/truss/setup.c (revision 288926) @@ -1,595 +1,592 @@ /*- * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Various setup functions for truss. Not the cleanest-written code, * I'm afraid. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "syscall.h" #include "extern.h" SET_DECLARE(procabi, struct procabi); static sig_atomic_t detaching; static void new_proc(struct trussinfo *, pid_t); /* * setup_and_wait() is called to start a process. All it really does * is fork(), enable tracing in the child, and then exec the given * command. At that point, the child process stops, and the parent * can wake up and deal with it. */ void setup_and_wait(struct trussinfo *info, char *command[]) { pid_t pid; pid = vfork(); if (pid == -1) err(1, "fork failed"); if (pid == 0) { /* Child */ ptrace(PT_TRACE_ME, 0, 0, 0); execvp(command[0], command); err(1, "execvp %s", command[0]); } /* Only in the parent here */ if (waitpid(pid, NULL, 0) < 0) err(1, "unexpect stop in waitpid"); new_proc(info, pid); } /* * start_tracing is called to attach to an existing process. */ void start_tracing(struct trussinfo *info, pid_t pid) { int ret, retry; retry = 10; do { ret = ptrace(PT_ATTACH, pid, NULL, 0); usleep(200); } while (ret && retry-- > 0); if (ret) err(1, "can not attach to target process"); if (waitpid(pid, NULL, 0) < 0) err(1, "Unexpect stop in waitpid"); new_proc(info, pid); } /* * Restore a process back to it's pre-truss state. * Called for SIGINT, SIGTERM, SIGQUIT. This only * applies if truss was told to monitor an already-existing * process. */ void restore_proc(int signo __unused) { detaching = 1; } static void detach_proc(pid_t pid) { /* stop the child so that we can detach */ kill(pid, SIGSTOP); if (waitpid(pid, NULL, 0) < 0) err(1, "Unexpected stop in waitpid"); if (ptrace(PT_DETACH, pid, (caddr_t)1, 0) < 0) err(1, "Can not detach the process"); kill(pid, SIGCONT); } /* * Determine the ABI. This is called after every exec, and when * a process is first monitored. */ static struct procabi * find_abi(pid_t pid) { struct procabi **pabi; size_t len; int error; int mib[4]; char progt[32]; len = sizeof(progt); mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_SV_NAME; mib[3] = pid; error = sysctl(mib, 4, progt, &len, NULL, 0); if (error != 0) err(2, "can not get sysvec name"); SET_FOREACH(pabi, procabi) { if (strcmp((*pabi)->type, progt) == 0) return (*pabi); } warnx("ABI %s for pid %ld is not supported", progt, (long)pid); return (NULL); } static void new_proc(struct trussinfo *info, pid_t pid) { struct procinfo *np; /* * If this happens it means there is a bug in truss. Unfortunately * this will kill any processes are attached to. */ LIST_FOREACH(np, &info->proclist, entries) { if (np->pid == pid) errx(1, "Duplicate process for pid %ld", (long)pid); } if (info->flags & FOLLOWFORKS) if (ptrace(PT_FOLLOW_FORK, pid, NULL, 1) == -1) err(1, "Unable to follow forks for pid %ld", (long)pid); np = calloc(1, sizeof(struct procinfo)); np->pid = pid; np->abi = find_abi(pid); SLIST_INIT(&np->threadlist); LIST_INSERT_HEAD(&info->proclist, np, entries); } static void free_proc(struct procinfo *p) { struct threadinfo *t, *t2; SLIST_FOREACH_SAFE(t, &p->threadlist, entries, t2) { free(t); } LIST_REMOVE(p, entries); free(p); } static void detach_all_procs(struct trussinfo *info) { struct procinfo *p, *p2; LIST_FOREACH_SAFE(p, &info->proclist, entries, p2) { detach_proc(p->pid); free_proc(p); } } static struct procinfo * find_proc(struct trussinfo *info, pid_t pid) { struct procinfo *np; LIST_FOREACH(np, &info->proclist, entries) { if (np->pid == pid) return (np); } return (NULL); } /* * Change curthread member based on (pid, lwpid). * If it is a new thread, create a threadinfo structure. */ static void find_thread(struct trussinfo *info, pid_t pid, lwpid_t lwpid) { struct procinfo *np; struct threadinfo *nt; np = find_proc(info, pid); assert(np != NULL); SLIST_FOREACH(nt, &np->threadlist, entries) { if (nt->tid == lwpid) { info->curthread = nt; return; } } nt = calloc(1, sizeof(struct threadinfo)); if (nt == NULL) err(1, "calloc() failed"); nt->proc = np; nt->tid = lwpid; SLIST_INSERT_HEAD(&np->threadlist, nt, entries); info->curthread = nt; } /* * When a process exits, it no longer has any threads left. However, * the main loop expects a valid curthread. In cases when a thread * triggers the termination (e.g. calling exit or triggering a fault) * we would ideally use that thread. However, if a process is killed * by a signal sent from another process then there is no "correct" * thread. We just punt and use the first thread. */ static void find_exit_thread(struct trussinfo *info, pid_t pid) { struct procinfo *np; struct threadinfo *nt; np = find_proc(info, pid); assert(np != NULL); if (SLIST_EMPTY(&np->threadlist)) { /* * If an existing process exits right after we attach * to it but before it posts any events, there won't * be any threads. Create a dummy thread and set its * "before" time to the global start time. */ nt = calloc(1, sizeof(struct threadinfo)); if (nt == NULL) err(1, "calloc() failed"); nt->proc = np; nt->tid = 0; SLIST_INSERT_HEAD(&np->threadlist, nt, entries); nt->before = info->start_time; } info->curthread = SLIST_FIRST(&np->threadlist); } static void alloc_syscall(struct threadinfo *t, struct ptrace_lwpinfo *pl) { u_int i; assert(t->in_syscall == 0); assert(t->cs.number == 0); assert(t->cs.name == NULL); assert(t->cs.nargs == 0); for (i = 0; i < nitems(t->cs.s_args); i++) assert(t->cs.s_args[i] == NULL); memset(t->cs.args, 0, sizeof(t->cs.args)); t->cs.number = pl->pl_syscall_code; t->in_syscall = 1; } static void free_syscall(struct threadinfo *t) { u_int i; for (i = 0; i < t->cs.nargs; i++) free(t->cs.s_args[i]); memset(&t->cs, 0, sizeof(t->cs)); t->in_syscall = 0; } static void enter_syscall(struct trussinfo *info, struct ptrace_lwpinfo *pl) { struct threadinfo *t; struct syscall *sc; u_int i, narg; t = info->curthread; alloc_syscall(t, pl); narg = MIN(pl->pl_syscall_narg, nitems(t->cs.args)); if (narg != 0 && t->proc->abi->fetch_args(info, narg) != 0) { free_syscall(t); return; } if (t->cs.number >= 0 && t->cs.number < t->proc->abi->nsyscalls) t->cs.name = t->proc->abi->syscallnames[t->cs.number]; if (t->cs.name == NULL) fprintf(info->outfile, "-- UNKNOWN %s SYSCALL %d --\n", t->proc->abi->type, t->cs.number); sc = get_syscall(t->cs.name, narg); t->cs.nargs = sc->nargs; assert(sc->nargs <= nitems(t->cs.s_args)); t->cs.sc = sc; /* * At this point, we set up the system call arguments. * We ignore any OUT ones, however -- those are arguments that * are set by the system call, and so are probably meaningless * now. This doesn't currently support arguments that are * passed in *and* out, however. */ if (t->cs.name != NULL) { #if DEBUG fprintf(stderr, "syscall %s(", t->cs.name); #endif for (i = 0; i < t->cs.nargs; i++) { #if DEBUG fprintf(stderr, "0x%lx%s", sc ? t->cs.args[sc->args[i].offset] : t->cs.args[i], i < (t->cs.nargs - 1) ? "," : ""); #endif if (!(sc->args[i].type & OUT)) { t->cs.s_args[i] = print_arg(&sc->args[i], t->cs.args, 0, info); } } #if DEBUG fprintf(stderr, ")\n"); #endif } clock_gettime(CLOCK_REALTIME, &t->before); } static void exit_syscall(struct trussinfo *info, struct ptrace_lwpinfo *pl) { struct threadinfo *t; struct procinfo *p; struct syscall *sc; long retval[2]; u_int i; int errorp; t = info->curthread; if (!t->in_syscall) return; clock_gettime(CLOCK_REALTIME, &t->after); p = t->proc; if (p->abi->fetch_retval(info, retval, &errorp) < 0) { free_syscall(t); return; } sc = t->cs.sc; /* * Here, we only look for arguments that have OUT masked in -- * otherwise, they were handled in enter_syscall(). */ for (i = 0; i < sc->nargs; i++) { char *temp; if (sc->args[i].type & OUT) { /* * If an error occurred, then don't bother * getting the data; it may not be valid. */ if (errorp) { asprintf(&temp, "0x%lx", t->cs.args[sc->args[i].offset]); } else { temp = print_arg(&sc->args[i], t->cs.args, retval, info); } t->cs.s_args[i] = temp; } } print_syscall_ret(info, t->cs.name, t->cs.nargs, t->cs.s_args, errorp, retval, sc); free_syscall(t); /* * If the process executed a new image, check the ABI. If the * new ABI isn't supported, stop tracing this process. */ if (pl->pl_flags & PL_FLAG_EXEC) { p->abi = find_abi(p->pid); if (p->abi == NULL) { if (ptrace(PT_DETACH, p->pid, (caddr_t)1, 0) < 0) err(1, "Can not detach the process"); free_proc(p); } } } static void report_exit(struct trussinfo *info, siginfo_t *si) { struct timespec timediff; if (info->flags & FOLLOWFORKS) fprintf(info->outfile, "%5d: ", si->si_pid); clock_gettime(CLOCK_REALTIME, &info->curthread->after); if (info->flags & ABSOLUTETIMESTAMPS) { timespecsubt(&info->curthread->after, &info->start_time, &timediff); fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } if (info->flags & RELATIVETIMESTAMPS) { timespecsubt(&info->curthread->after, &info->curthread->before, &timediff); fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } if (si->si_code == CLD_EXITED) fprintf(info->outfile, "process exit, rval = %u\n", si->si_status); else fprintf(info->outfile, "process killed, signal = %u%s\n", si->si_status, si->si_code == CLD_DUMPED ? " (core dumped)" : ""); } static void report_new_child(struct trussinfo *info, pid_t pid) { struct timespec timediff; clock_gettime(CLOCK_REALTIME, &info->curthread->after); assert(info->flags & FOLLOWFORKS); fprintf(info->outfile, "%5d: ", pid); if (info->flags & ABSOLUTETIMESTAMPS) { timespecsubt(&info->curthread->after, &info->start_time, &timediff); fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } if (info->flags & RELATIVETIMESTAMPS) { timediff.tv_sec = 0; timediff.tv_nsec = 0; fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } fprintf(info->outfile, "\n"); } static void report_signal(struct trussinfo *info, siginfo_t *si) { struct timespec timediff; char *signame; if (info->flags & FOLLOWFORKS) fprintf(info->outfile, "%5d: ", si->si_pid); if (info->flags & ABSOLUTETIMESTAMPS) { timespecsubt(&info->curthread->after, &info->start_time, &timediff); fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } if (info->flags & RELATIVETIMESTAMPS) { timespecsubt(&info->curthread->after, &info->curthread->before, &timediff); fprintf(info->outfile, "%jd.%09ld ", (intmax_t)timediff.tv_sec, timediff.tv_nsec); } signame = strsig(si->si_status); fprintf(info->outfile, "SIGNAL %u (%s)\n", si->si_status, signame == NULL ? "?" : signame); } /* * Wait for events until all the processes have exited or truss has been * asked to stop. */ void eventloop(struct trussinfo *info) { struct ptrace_lwpinfo pl; siginfo_t si; int pending_signal; while (!LIST_EMPTY(&info->proclist)) { if (detaching) { detach_all_procs(info); return; } if (waitid(P_ALL, 0, &si, WTRAPPED | WEXITED) == -1) { if (errno == EINTR) continue; err(1, "Unexpected error from waitid"); } assert(si.si_signo == SIGCHLD); switch (si.si_code) { case CLD_EXITED: case CLD_KILLED: case CLD_DUMPED: find_exit_thread(info, si.si_pid); if ((info->flags & COUNTONLY) == 0) report_exit(info, &si); free_proc(info->curthread->proc); info->curthread = NULL; break; case CLD_TRAPPED: if (ptrace(PT_LWPINFO, si.si_pid, (caddr_t)&pl, sizeof(pl)) == -1) err(1, "ptrace(PT_LWPINFO)"); if (pl.pl_flags & PL_FLAG_CHILD) { new_proc(info, si.si_pid); assert(LIST_FIRST(&info->proclist)->abi != NULL); } find_thread(info, si.si_pid, pl.pl_lwpid); - if (si.si_status == SIGTRAP) { + if (si.si_status == SIGTRAP && + (pl.pl_flags & (PL_FLAG_SCE|PL_FLAG_SCX)) != 0) { if (pl.pl_flags & PL_FLAG_SCE) enter_syscall(info, &pl); else if (pl.pl_flags & PL_FLAG_SCX) exit_syscall(info, &pl); - else - errx(1, - "pl_flags %x contains neither PL_FLAG_SCE nor PL_FLAG_SCX", - pl.pl_flags); pending_signal = 0; } else if (pl.pl_flags & PL_FLAG_CHILD) { if ((info->flags & COUNTONLY) == 0) report_new_child(info, si.si_pid); pending_signal = 0; } else { if ((info->flags & NOSIGS) == 0) report_signal(info, &si); pending_signal = si.si_status; } ptrace(PT_SYSCALL, si.si_pid, (caddr_t)1, pending_signal); break; case CLD_STOPPED: errx(1, "waitid reported CLD_STOPPED"); case CLD_CONTINUED: break; } } } Index: projects/clang370-import/usr.bin/xo/Makefile =================================================================== --- projects/clang370-import/usr.bin/xo/Makefile (revision 288925) +++ projects/clang370-import/usr.bin/xo/Makefile (revision 288926) @@ -1,15 +1,15 @@ # $FreeBSD$ -LIBXO= ${.CURDIR:H:H}/contrib/libxo +LIBXOSRC= ${SRCTOP}/contrib/libxo -.PATH: ${LIBXO}/xo +.PATH: ${LIBXOSRC}/xo PROG= xo MAN= xo.1 # XXX For xoversion.h -CFLAGS+=-I${LIBXO}/libxo +CFLAGS+=-I${LIBXOSRC}/libxo LIBADD= xo util .include Index: projects/clang370-import/usr.sbin/rpcbind/rpcb_svc_com.c =================================================================== --- projects/clang370-import/usr.sbin/rpcbind/rpcb_svc_com.c (revision 288925) +++ projects/clang370-import/usr.sbin/rpcbind/rpcb_svc_com.c (revision 288926) @@ -1,1484 +1,1486 @@ /* $NetBSD: rpcb_svc_com.c,v 1.9 2002/11/08 00:16:39 fvdl Exp $ */ /* $FreeBSD$ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986 - 1991 by Sun Microsystems, Inc. */ /* #ident "@(#)rpcb_svc_com.c 1.18 94/05/02 SMI" */ /* * rpcb_svc_com.c * The commom server procedure for the rpcbind. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PORTMAP #include #include #endif /* PORTMAP */ #include #include #include "rpcbind.h" #define RPC_BUF_MAX 65536 /* can be raised if required */ static char *nullstring = ""; static int rpcb_rmtcalls; struct rmtcallfd_list { int fd; SVCXPRT *xprt; char *netid; struct rmtcallfd_list *next; }; #define NFORWARD 64 #define MAXTIME_OFF 300 /* 5 minutes */ struct finfo { int flag; #define FINFO_ACTIVE 0x1 u_int32_t caller_xid; struct netbuf *caller_addr; u_int32_t forward_xid; int forward_fd; char *uaddr; rpcproc_t reply_type; rpcvers_t versnum; time_t time; }; static struct finfo FINFO[NFORWARD]; static bool_t xdr_encap_parms(XDR *, struct encap_parms *); static bool_t xdr_rmtcall_args(XDR *, struct r_rmtcall_args *); static bool_t xdr_rmtcall_result(XDR *, struct r_rmtcall_args *); static bool_t xdr_opaque_parms(XDR *, struct r_rmtcall_args *); static int find_rmtcallfd_by_netid(char *); static SVCXPRT *find_rmtcallxprt_by_fd(int); static int forward_register(u_int32_t, struct netbuf *, int, char *, rpcproc_t, rpcvers_t, u_int32_t *); static struct finfo *forward_find(u_int32_t); static int free_slot_by_xid(u_int32_t); static int free_slot_by_index(int); static int netbufcmp(struct netbuf *, struct netbuf *); static struct netbuf *netbufdup(struct netbuf *); static void netbuffree(struct netbuf *); static int check_rmtcalls(struct pollfd *, int); static void xprt_set_caller(SVCXPRT *, struct finfo *); static void send_svcsyserr(SVCXPRT *, struct finfo *); static void handle_reply(int, SVCXPRT *); static void find_versions(rpcprog_t, char *, rpcvers_t *, rpcvers_t *); static rpcblist_ptr find_service(rpcprog_t, rpcvers_t, char *); static char *getowner(SVCXPRT *, char *, size_t); static int add_pmaplist(RPCB *); static int del_pmaplist(RPCB *); /* * Set a mapping of program, version, netid */ /* ARGSUSED */ void * rpcbproc_set_com(void *arg, struct svc_req *rqstp __unused, SVCXPRT *transp, rpcvers_t rpcbversnum) { RPCB *regp = (RPCB *)arg; static bool_t ans; char owner[64]; #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "RPCB_SET request for (%lu, %lu, %s, %s) : ", (unsigned long)regp->r_prog, (unsigned long)regp->r_vers, regp->r_netid, regp->r_addr); #endif ans = map_set(regp, getowner(transp, owner, sizeof owner)); #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "%s\n", ans == TRUE ? "succeeded" : "failed"); #endif /* XXX: should have used some defined constant here */ rpcbs_set(rpcbversnum - 2, ans); return (void *)&ans; } bool_t map_set(RPCB *regp, char *owner) { RPCB reg, *a; rpcblist_ptr rbl, fnd; reg = *regp; /* * check to see if already used * find_service returns a hit even if * the versions don't match, so check for it */ fnd = find_service(reg.r_prog, reg.r_vers, reg.r_netid); if (fnd && (fnd->rpcb_map.r_vers == reg.r_vers)) { if (!strcmp(fnd->rpcb_map.r_addr, reg.r_addr)) /* * if these match then it is already * registered so just say "OK". */ return (TRUE); else return (FALSE); } /* * add to the end of the list */ rbl = malloc(sizeof (RPCBLIST)); if (rbl == NULL) return (FALSE); a = &(rbl->rpcb_map); a->r_prog = reg.r_prog; a->r_vers = reg.r_vers; a->r_netid = strdup(reg.r_netid); a->r_addr = strdup(reg.r_addr); a->r_owner = strdup(owner); if (!a->r_addr || !a->r_netid || !a->r_owner) { if (a->r_netid) free(a->r_netid); if (a->r_addr) free(a->r_addr); if (a->r_owner) free(a->r_owner); free(rbl); return (FALSE); } rbl->rpcb_next = (rpcblist_ptr)NULL; if (list_rbl == NULL) { list_rbl = rbl; } else { for (fnd = list_rbl; fnd->rpcb_next; fnd = fnd->rpcb_next) ; fnd->rpcb_next = rbl; } #ifdef PORTMAP (void) add_pmaplist(regp); #endif return (TRUE); } /* * Unset a mapping of program, version, netid */ /* ARGSUSED */ void * rpcbproc_unset_com(void *arg, struct svc_req *rqstp __unused, SVCXPRT *transp, rpcvers_t rpcbversnum) { RPCB *regp = (RPCB *)arg; static bool_t ans; char owner[64]; #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "RPCB_UNSET request for (%lu, %lu, %s) : ", (unsigned long)regp->r_prog, (unsigned long)regp->r_vers, regp->r_netid); #endif ans = map_unset(regp, getowner(transp, owner, sizeof owner)); #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "%s\n", ans == TRUE ? "succeeded" : "failed"); #endif /* XXX: should have used some defined constant here */ rpcbs_unset(rpcbversnum - 2, ans); return (void *)&ans; } bool_t map_unset(RPCB *regp, char *owner) { int ans = 0; rpcblist_ptr rbl, prev, tmp; if (owner == NULL) return (0); for (prev = NULL, rbl = list_rbl; rbl; /* cstyle */) { if ((rbl->rpcb_map.r_prog != regp->r_prog) || (rbl->rpcb_map.r_vers != regp->r_vers) || (regp->r_netid[0] && strcasecmp(regp->r_netid, rbl->rpcb_map.r_netid))) { /* both rbl & prev move forwards */ prev = rbl; rbl = rbl->rpcb_next; continue; } /* * Check whether appropriate uid. Unset only * if superuser or the owner itself. */ if (strcmp(owner, "superuser") && strcmp(rbl->rpcb_map.r_owner, owner)) return (0); /* found it; rbl moves forward, prev stays */ ans = 1; tmp = rbl; rbl = rbl->rpcb_next; if (prev == NULL) list_rbl = rbl; else prev->rpcb_next = rbl; free(tmp->rpcb_map.r_addr); free(tmp->rpcb_map.r_netid); free(tmp->rpcb_map.r_owner); free(tmp); } #ifdef PORTMAP if (ans) (void) del_pmaplist(regp); #endif /* * We return 1 either when the entry was not there or it * was able to unset it. It can come to this point only if * atleast one of the conditions is true. */ return (1); } void delete_prog(unsigned int prog) { RPCB reg; register rpcblist_ptr rbl; for (rbl = list_rbl; rbl != NULL; rbl = rbl->rpcb_next) { if ((rbl->rpcb_map.r_prog != prog)) continue; if (is_bound(rbl->rpcb_map.r_netid, rbl->rpcb_map.r_addr)) continue; reg.r_prog = rbl->rpcb_map.r_prog; reg.r_vers = rbl->rpcb_map.r_vers; reg.r_netid = strdup(rbl->rpcb_map.r_netid); (void) map_unset(®, "superuser"); free(reg.r_netid); } } void * rpcbproc_getaddr_com(RPCB *regp, struct svc_req *rqstp __unused, SVCXPRT *transp, rpcvers_t rpcbversnum, rpcvers_t verstype) { static char *uaddr; char *saddr = NULL; rpcblist_ptr fnd; if (uaddr != NULL && uaddr != nullstring) { free(uaddr); uaddr = NULL; } fnd = find_service(regp->r_prog, regp->r_vers, transp->xp_netid); if (fnd && ((verstype == RPCB_ALLVERS) || (regp->r_vers == fnd->rpcb_map.r_vers))) { if (*(regp->r_addr) != '\0') { /* may contain a hint about */ saddr = regp->r_addr; /* the interface that we */ } /* should use */ if (!(uaddr = mergeaddr(transp, transp->xp_netid, fnd->rpcb_map.r_addr, saddr))) { /* Try whatever we have */ uaddr = strdup(fnd->rpcb_map.r_addr); } else if (!uaddr[0]) { /* * The server died. Unset all versions of this prog. */ delete_prog(regp->r_prog); uaddr = nullstring; } } else { uaddr = nullstring; } #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "getaddr: %s\n", uaddr); #endif /* XXX: should have used some defined constant here */ rpcbs_getaddr(rpcbversnum - 2, regp->r_prog, regp->r_vers, transp->xp_netid, uaddr); return (void *)&uaddr; } /* ARGSUSED */ void * rpcbproc_gettime_com(void *arg __unused, struct svc_req *rqstp __unused, SVCXPRT *transp __unused, rpcvers_t rpcbversnum __unused) { static time_t curtime; (void) time(&curtime); return (void *)&curtime; } /* * Convert uaddr to taddr. Should be used only by * local servers/clients. (kernel level stuff only) */ /* ARGSUSED */ void * rpcbproc_uaddr2taddr_com(void *arg, struct svc_req *rqstp __unused, SVCXPRT *transp, rpcvers_t rpcbversnum __unused) { char **uaddrp = (char **)arg; struct netconfig *nconf; static struct netbuf nbuf; static struct netbuf *taddr; if (taddr) { free(taddr->buf); free(taddr); taddr = NULL; } if (((nconf = rpcbind_get_conf(transp->xp_netid)) == NULL) || ((taddr = uaddr2taddr(nconf, *uaddrp)) == NULL)) { (void) memset((char *)&nbuf, 0, sizeof (struct netbuf)); return (void *)&nbuf; } return (void *)taddr; } /* * Convert taddr to uaddr. Should be used only by * local servers/clients. (kernel level stuff only) */ /* ARGSUSED */ void * rpcbproc_taddr2uaddr_com(void *arg, struct svc_req *rqstp __unused, SVCXPRT *transp, rpcvers_t rpcbversnum __unused) { struct netbuf *taddr = (struct netbuf *)arg; static char *uaddr; struct netconfig *nconf; #ifdef CHEW_FDS int fd; if ((fd = open("/dev/null", O_RDONLY)) == -1) { uaddr = (char *)strerror(errno); return (&uaddr); } #endif /* CHEW_FDS */ if (uaddr != NULL && uaddr != nullstring) { free(uaddr); uaddr = NULL; } if (((nconf = rpcbind_get_conf(transp->xp_netid)) == NULL) || ((uaddr = taddr2uaddr(nconf, taddr)) == NULL)) { uaddr = nullstring; } return (void *)&uaddr; } static bool_t xdr_encap_parms(XDR *xdrs, struct encap_parms *epp) { return (xdr_bytes(xdrs, &(epp->args), (u_int *) &(epp->arglen), ~0)); } /* * XDR remote call arguments. It ignores the address part. * written for XDR_DECODE direction only */ static bool_t xdr_rmtcall_args(XDR *xdrs, struct r_rmtcall_args *cap) { /* does not get the address or the arguments */ if (xdr_u_int32_t(xdrs, &(cap->rmt_prog)) && xdr_u_int32_t(xdrs, &(cap->rmt_vers)) && xdr_u_int32_t(xdrs, &(cap->rmt_proc))) { return (xdr_encap_parms(xdrs, &(cap->rmt_args))); } return (FALSE); } /* * XDR remote call results along with the address. Ignore * program number, version number and proc number. * Written for XDR_ENCODE direction only. */ static bool_t xdr_rmtcall_result(XDR *xdrs, struct r_rmtcall_args *cap) { bool_t result; #ifdef PORTMAP if (cap->rmt_localvers == PMAPVERS) { int h1, h2, h3, h4, p1, p2; u_long port; /* interpret the universal address for TCP/IP */ if (sscanf(cap->rmt_uaddr, "%d.%d.%d.%d.%d.%d", &h1, &h2, &h3, &h4, &p1, &p2) != 6) return (FALSE); port = ((p1 & 0xff) << 8) + (p2 & 0xff); result = xdr_u_long(xdrs, &port); } else #endif if ((cap->rmt_localvers == RPCBVERS) || (cap->rmt_localvers == RPCBVERS4)) { result = xdr_wrapstring(xdrs, &(cap->rmt_uaddr)); } else { return (FALSE); } if (result == TRUE) return (xdr_encap_parms(xdrs, &(cap->rmt_args))); return (FALSE); } /* * only worries about the struct encap_parms part of struct r_rmtcall_args. * The arglen must already be set!! */ static bool_t xdr_opaque_parms(XDR *xdrs, struct r_rmtcall_args *cap) { return (xdr_opaque(xdrs, cap->rmt_args.args, cap->rmt_args.arglen)); } static struct rmtcallfd_list *rmthead; static struct rmtcallfd_list *rmttail; int create_rmtcall_fd(struct netconfig *nconf) { int fd; struct rmtcallfd_list *rmt; SVCXPRT *xprt; if ((fd = __rpc_nconf2fd(nconf)) == -1) { if (debugging) fprintf(stderr, "create_rmtcall_fd: couldn't open \"%s\" (errno %d)\n", nconf->nc_device, errno); return (-1); } xprt = svc_tli_create(fd, 0, (struct t_bind *) 0, 0, 0); if (xprt == NULL) { if (debugging) fprintf(stderr, "create_rmtcall_fd: svc_tli_create failed\n"); return (-1); } rmt = malloc(sizeof (struct rmtcallfd_list)); if (rmt == NULL) { syslog(LOG_ERR, "create_rmtcall_fd: no memory!"); return (-1); } rmt->xprt = xprt; rmt->netid = strdup(nconf->nc_netid); xprt->xp_netid = rmt->netid; rmt->fd = fd; rmt->next = NULL; if (rmthead == NULL) { rmthead = rmt; rmttail = rmt; } else { rmttail->next = rmt; rmttail = rmt; } /* XXX not threadsafe */ if (fd > svc_maxfd) svc_maxfd = fd; FD_SET(fd, &svc_fdset); return (fd); } static int find_rmtcallfd_by_netid(char *netid) { struct rmtcallfd_list *rmt; for (rmt = rmthead; rmt != NULL; rmt = rmt->next) { if (strcmp(netid, rmt->netid) == 0) { return (rmt->fd); } } return (-1); } static SVCXPRT * find_rmtcallxprt_by_fd(int fd) { struct rmtcallfd_list *rmt; for (rmt = rmthead; rmt != NULL; rmt = rmt->next) { if (fd == rmt->fd) { return (rmt->xprt); } } return (NULL); } /* * Call a remote procedure service. This procedure is very quiet when things * go wrong. The proc is written to support broadcast rpc. In the broadcast * case, a machine should shut-up instead of complain, lest the requestor be * overrun with complaints at the expense of not hearing a valid reply. * When receiving a request and verifying that the service exists, we * * receive the request * * open a new TLI endpoint on the same transport on which we received * the original request * * remember the original request's XID (which requires knowing the format * of the svc_dg_data structure) * * forward the request, with a new XID, to the requested service, * remembering the XID used to send this request (for later use in * reassociating the answer with the original request), the requestor's * address, the file descriptor on which the forwarded request is * made and the service's address. * * mark the file descriptor on which we anticipate receiving a reply from * the service and one to select for in our private svc_run procedure * * At some time in the future, a reply will be received from the service to * which we forwarded the request. At that time, we detect that the socket * used was for forwarding (by looking through the finfo structures to see * whether the fd corresponds to one of those) and call handle_reply() to * * receive the reply * * bundle the reply, along with the service's universal address * * create a SVCXPRT structure and use a version of svc_sendreply * that allows us to specify the reply XID and destination, send the reply * to the original requestor. */ void rpcbproc_callit_com(struct svc_req *rqstp, SVCXPRT *transp, rpcproc_t reply_type, rpcvers_t versnum) { register rpcblist_ptr rbl; struct netconfig *nconf; struct netbuf *caller; struct r_rmtcall_args a; char *buf_alloc = NULL, *outbufp; char *outbuf_alloc = NULL; char buf[RPC_BUF_MAX], outbuf[RPC_BUF_MAX]; struct netbuf *na = (struct netbuf *) NULL; struct rpc_msg call_msg; int outlen; u_int sendsz; XDR outxdr; AUTH *auth; int fd = -1; char *uaddr, *m_uaddr = NULL, *local_uaddr = NULL; u_int32_t *xidp; struct __rpc_sockinfo si; struct sockaddr *localsa; struct netbuf tbuf; if (!__rpc_fd2sockinfo(transp->xp_fd, &si)) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); return; } if (si.si_socktype != SOCK_DGRAM) return; /* Only datagram type accepted */ sendsz = __rpc_get_t_size(si.si_af, si.si_proto, UDPMSGSIZE); if (sendsz == 0) { /* data transfer not supported */ if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); return; } /* * Should be multiple of 4 for XDR. */ sendsz = ((sendsz + 3) / 4) * 4; if (sendsz > RPC_BUF_MAX) { #ifdef notyet buf_alloc = alloca(sendsz); /* not in IDR2? */ #else buf_alloc = malloc(sendsz); #endif /* notyet */ if (buf_alloc == NULL) { if (debugging) fprintf(stderr, "rpcbproc_callit_com: No Memory!\n"); if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); return; } a.rmt_args.args = buf_alloc; } else { a.rmt_args.args = buf; } call_msg.rm_xid = 0; /* For error checking purposes */ if (!svc_getargs(transp, (xdrproc_t) xdr_rmtcall_args, (char *) &a)) { if (reply_type == RPCBPROC_INDIRECT) svcerr_decode(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: svc_getargs failed\n"); goto error; } if (!check_callit(transp, &a, versnum)) { svcerr_weakauth(transp); goto error; } caller = svc_getrpccaller(transp); #ifdef RPCBIND_DEBUG if (debugging) { uaddr = taddr2uaddr(rpcbind_get_conf(transp->xp_netid), caller); fprintf(stderr, "%s %s req for (%lu, %lu, %lu, %s) from %s : ", versnum == PMAPVERS ? "pmap_rmtcall" : versnum == RPCBVERS ? "rpcb_rmtcall" : versnum == RPCBVERS4 ? "rpcb_indirect" : "unknown", reply_type == RPCBPROC_INDIRECT ? "indirect" : "callit", (unsigned long)a.rmt_prog, (unsigned long)a.rmt_vers, (unsigned long)a.rmt_proc, transp->xp_netid, uaddr ? uaddr : "unknown"); if (uaddr) free(uaddr); } #endif rbl = find_service(a.rmt_prog, a.rmt_vers, transp->xp_netid); rpcbs_rmtcall(versnum - 2, reply_type, a.rmt_prog, a.rmt_vers, a.rmt_proc, transp->xp_netid, rbl); if (rbl == (rpcblist_ptr)NULL) { #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "not found\n"); #endif if (reply_type == RPCBPROC_INDIRECT) svcerr_noprog(transp); goto error; } if (rbl->rpcb_map.r_vers != a.rmt_vers) { if (reply_type == RPCBPROC_INDIRECT) { rpcvers_t vers_low, vers_high; find_versions(a.rmt_prog, transp->xp_netid, &vers_low, &vers_high); svcerr_progvers(transp, vers_low, vers_high); } goto error; } #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "found at uaddr %s\n", rbl->rpcb_map.r_addr); #endif /* * Check whether this entry is valid and a server is present * Mergeaddr() returns NULL if no such entry is present, and * returns "" if the entry was present but the server is not * present (i.e., it crashed). */ if (reply_type == RPCBPROC_INDIRECT) { uaddr = mergeaddr(transp, transp->xp_netid, rbl->rpcb_map.r_addr, NULL); if (uaddr == NULL || uaddr[0] == '\0') { svcerr_noprog(transp); if (uaddr != NULL) free(uaddr); goto error; } free(uaddr); } nconf = rpcbind_get_conf(transp->xp_netid); if (nconf == (struct netconfig *)NULL) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: rpcbind_get_conf failed\n"); goto error; } localsa = local_sa(((struct sockaddr *)caller->buf)->sa_family); if (localsa == NULL) { if (debugging) fprintf(stderr, "rpcbproc_callit_com: no local address\n"); goto error; } tbuf.len = tbuf.maxlen = localsa->sa_len; tbuf.buf = localsa; local_uaddr = addrmerge(&tbuf, rbl->rpcb_map.r_addr, NULL, nconf->nc_netid); m_uaddr = addrmerge(caller, rbl->rpcb_map.r_addr, NULL, nconf->nc_netid); #ifdef RPCBIND_DEBUG if (debugging) fprintf(stderr, "merged uaddr %s\n", m_uaddr); #endif if ((fd = find_rmtcallfd_by_netid(nconf->nc_netid)) == -1) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); goto error; } xidp = __rpcb_get_dg_xidp(transp); switch (forward_register(*xidp, caller, fd, m_uaddr, reply_type, versnum, &call_msg.rm_xid)) { case 1: /* Success; forward_register() will free m_uaddr for us. */ m_uaddr = NULL; break; case 0: /* * A duplicate request for the slow server. Let's not * beat on it any more. */ if (debugging) fprintf(stderr, "rpcbproc_callit_com: duplicate request\n"); goto error; case -1: /* forward_register failed. Perhaps no memory. */ if (debugging) fprintf(stderr, "rpcbproc_callit_com: forward_register failed\n"); goto error; } #ifdef DEBUG_RMTCALL if (debugging) fprintf(stderr, "rpcbproc_callit_com: original XID %x, new XID %x\n", *xidp, call_msg.rm_xid); #endif call_msg.rm_direction = CALL; call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; call_msg.rm_call.cb_prog = a.rmt_prog; call_msg.rm_call.cb_vers = a.rmt_vers; if (sendsz > RPC_BUF_MAX) { #ifdef notyet outbuf_alloc = alloca(sendsz); /* not in IDR2? */ #else outbuf_alloc = malloc(sendsz); #endif /* notyet */ if (outbuf_alloc == NULL) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: No memory!\n"); goto error; } xdrmem_create(&outxdr, outbuf_alloc, sendsz, XDR_ENCODE); } else { xdrmem_create(&outxdr, outbuf, sendsz, XDR_ENCODE); } if (!xdr_callhdr(&outxdr, &call_msg)) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: xdr_callhdr failed\n"); goto error; } if (!xdr_u_int32_t(&outxdr, &(a.rmt_proc))) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: xdr_u_long failed\n"); goto error; } if (rqstp->rq_cred.oa_flavor == AUTH_NULL) { auth = authnone_create(); } else if (rqstp->rq_cred.oa_flavor == AUTH_SYS) { struct authunix_parms *au; au = (struct authunix_parms *)rqstp->rq_clntcred; auth = authunix_create(au->aup_machname, au->aup_uid, au->aup_gid, au->aup_len, au->aup_gids); if (auth == NULL) /* fall back */ auth = authnone_create(); } else { /* we do not support any other authentication scheme */ if (debugging) fprintf(stderr, "rpcbproc_callit_com: oa_flavor != AUTH_NONE and oa_flavor != AUTH_SYS\n"); if (reply_type == RPCBPROC_INDIRECT) svcerr_weakauth(transp); /* XXX too strong.. */ goto error; } if (auth == NULL) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: authwhatever_create returned NULL\n"); goto error; } if (!AUTH_MARSHALL(auth, &outxdr)) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); AUTH_DESTROY(auth); if (debugging) fprintf(stderr, "rpcbproc_callit_com: AUTH_MARSHALL failed\n"); goto error; } AUTH_DESTROY(auth); if (!xdr_opaque_parms(&outxdr, &a)) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); if (debugging) fprintf(stderr, "rpcbproc_callit_com: xdr_opaque_parms failed\n"); goto error; } outlen = (int) XDR_GETPOS(&outxdr); if (outbuf_alloc) outbufp = outbuf_alloc; else outbufp = outbuf; na = uaddr2taddr(nconf, local_uaddr); if (!na) { if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); goto error; } if (sendto(fd, outbufp, outlen, 0, (struct sockaddr *)na->buf, na->len) != outlen) { if (debugging) fprintf(stderr, "rpcbproc_callit_com: sendto failed: errno %d\n", errno); if (reply_type == RPCBPROC_INDIRECT) svcerr_systemerr(transp); goto error; } goto out; error: if (call_msg.rm_xid != 0) (void) free_slot_by_xid(call_msg.rm_xid); out: if (local_uaddr) free(local_uaddr); if (buf_alloc) free(buf_alloc); if (outbuf_alloc) free(outbuf_alloc); if (na) { free(na->buf); free(na); } if (m_uaddr != NULL) free(m_uaddr); } /* * Makes an entry into the FIFO for the given request. * Returns 1 on success, 0 if this is a duplicate request, or -1 on error. * *callxidp is set to the xid of the call. */ static int forward_register(u_int32_t caller_xid, struct netbuf *caller_addr, int forward_fd, char *uaddr, rpcproc_t reply_type, rpcvers_t versnum, u_int32_t *callxidp) { int i; int j = 0; time_t min_time, time_now; static u_int32_t lastxid; int entry = -1; min_time = FINFO[0].time; time_now = time((time_t *)0); /* initialization */ if (lastxid == 0) lastxid = time_now * NFORWARD; /* * Check if it is a duplicate entry. Then, * try to find an empty slot. If not available, then * use the slot with the earliest time. */ for (i = 0; i < NFORWARD; i++) { if (FINFO[i].flag & FINFO_ACTIVE) { if ((FINFO[i].caller_xid == caller_xid) && (FINFO[i].reply_type == reply_type) && (FINFO[i].versnum == versnum) && (!netbufcmp(FINFO[i].caller_addr, caller_addr))) { FINFO[i].time = time((time_t *)0); return (0); /* Duplicate entry */ } else { /* Should we wait any longer */ if ((time_now - FINFO[i].time) > MAXTIME_OFF) (void) free_slot_by_index(i); } } if (entry == -1) { if ((FINFO[i].flag & FINFO_ACTIVE) == 0) { entry = i; } else if (FINFO[i].time < min_time) { j = i; min_time = FINFO[i].time; } } } if (entry != -1) { /* use this empty slot */ j = entry; } else { (void) free_slot_by_index(j); } if ((FINFO[j].caller_addr = netbufdup(caller_addr)) == NULL) { return (-1); } rpcb_rmtcalls++; /* no of pending calls */ FINFO[j].flag = FINFO_ACTIVE; FINFO[j].reply_type = reply_type; FINFO[j].versnum = versnum; FINFO[j].time = time_now; FINFO[j].caller_xid = caller_xid; FINFO[j].forward_fd = forward_fd; /* * Though uaddr is not allocated here, it will still be freed * from free_slot_*(). */ FINFO[j].uaddr = uaddr; lastxid = lastxid + NFORWARD; /* Don't allow a zero xid below. */ if ((u_int32_t)(lastxid + NFORWARD) <= NFORWARD) lastxid = NFORWARD; FINFO[j].forward_xid = lastxid + j; /* encode slot */ *callxidp = FINFO[j].forward_xid; /* forward on this xid */ return (1); } static struct finfo * forward_find(u_int32_t reply_xid) { int i; i = reply_xid % (u_int32_t)NFORWARD; if ((FINFO[i].flag & FINFO_ACTIVE) && (FINFO[i].forward_xid == reply_xid)) { return (&FINFO[i]); } return (NULL); } static int free_slot_by_xid(u_int32_t xid) { int entry; entry = xid % (u_int32_t)NFORWARD; return (free_slot_by_index(entry)); } static int free_slot_by_index(int index) { struct finfo *fi; fi = &FINFO[index]; if (fi->flag & FINFO_ACTIVE) { netbuffree(fi->caller_addr); /* XXX may be too big, but can't access xprt array here */ if (fi->forward_fd >= svc_maxfd) svc_maxfd--; free(fi->uaddr); fi->flag &= ~FINFO_ACTIVE; rpcb_rmtcalls--; return (1); } return (0); } static int netbufcmp(struct netbuf *n1, struct netbuf *n2) { return ((n1->len != n2->len) || memcmp(n1->buf, n2->buf, n1->len)); } static bool_t netbuf_copybuf(struct netbuf *dst, const struct netbuf *src) { + assert(src->len <= src->maxlen); - if (dst->len != src->len || dst->buf == NULL) { + if (dst->maxlen < src->len || dst->buf == NULL) { if (dst->buf != NULL) free(dst->buf); - if ((dst->buf = malloc(src->len)) == NULL) + if ((dst->buf = calloc(1, src->maxlen)) == NULL) return (FALSE); - - dst->maxlen = dst->len = src->len; + dst->maxlen = src->maxlen; } + dst->len = src->len; memcpy(dst->buf, src->buf, src->len); + return (TRUE); } static struct netbuf * netbufdup(struct netbuf *ap) { struct netbuf *np; if ((np = calloc(1, sizeof(struct netbuf))) == NULL) return (NULL); if (netbuf_copybuf(np, ap) == FALSE) { free(np); return (NULL); } return (np); } static void netbuffree(struct netbuf *ap) { free(ap->buf); ap->buf = NULL; free(ap); } #define MASKVAL (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND) extern bool_t __svc_clean_idle(fd_set *, int, bool_t); void my_svc_run(void) { size_t nfds; struct pollfd pollfds[FD_SETSIZE]; int poll_ret, check_ret; int n; #ifdef SVC_RUN_DEBUG int i; #endif register struct pollfd *p; fd_set cleanfds; for (;;) { p = pollfds; for (n = 0; n <= svc_maxfd; n++) { if (FD_ISSET(n, &svc_fdset)) { p->fd = n; p->events = MASKVAL; p++; } } nfds = p - pollfds; poll_ret = 0; #ifdef SVC_RUN_DEBUG if (debugging) { fprintf(stderr, "polling for read on fd < "); for (i = 0, p = pollfds; i < nfds; i++, p++) if (p->events) fprintf(stderr, "%d ", p->fd); fprintf(stderr, ">\n"); } #endif switch (poll_ret = poll(pollfds, nfds, 30 * 1000)) { case -1: /* * We ignore all errors, continuing with the assumption * that it was set by the signal handlers (or any * other outside event) and not caused by poll(). */ case 0: cleanfds = svc_fdset; __svc_clean_idle(&cleanfds, 30, FALSE); continue; default: #ifdef SVC_RUN_DEBUG if (debugging) { fprintf(stderr, "poll returned read fds < "); for (i = 0, p = pollfds; i < nfds; i++, p++) if (p->revents) fprintf(stderr, "%d ", p->fd); fprintf(stderr, ">\n"); } #endif /* * If we found as many replies on callback fds * as the number of descriptors selectable which * poll() returned, there can be no more so we * don't call svc_getreq_poll. Otherwise, there * must be another so we must call svc_getreq_poll. */ if ((check_ret = check_rmtcalls(pollfds, nfds)) == poll_ret) continue; svc_getreq_poll(pollfds, poll_ret-check_ret); } #ifdef SVC_RUN_DEBUG if (debugging) { fprintf(stderr, "svc_maxfd now %u\n", svc_maxfd); } #endif } } static int check_rmtcalls(struct pollfd *pfds, int nfds) { int j, ncallbacks_found = 0, rmtcalls_pending; SVCXPRT *xprt; if (rpcb_rmtcalls == 0) return (0); rmtcalls_pending = rpcb_rmtcalls; for (j = 0; j < nfds; j++) { if ((xprt = find_rmtcallxprt_by_fd(pfds[j].fd)) != NULL) { if (pfds[j].revents) { ncallbacks_found++; #ifdef DEBUG_RMTCALL if (debugging) fprintf(stderr, "my_svc_run: polled on forwarding fd %d, netid %s - calling handle_reply\n", pfds[j].fd, xprt->xp_netid); #endif handle_reply(pfds[j].fd, xprt); pfds[j].revents = 0; if (ncallbacks_found >= rmtcalls_pending) { break; } } } } return (ncallbacks_found); } static void xprt_set_caller(SVCXPRT *xprt, struct finfo *fi) { u_int32_t *xidp; netbuf_copybuf(svc_getrpccaller(xprt), fi->caller_addr); xidp = __rpcb_get_dg_xidp(xprt); *xidp = fi->caller_xid; } /* * Call svcerr_systemerr() only if RPCBVERS4 */ static void send_svcsyserr(SVCXPRT *xprt, struct finfo *fi) { if (fi->reply_type == RPCBPROC_INDIRECT) { xprt_set_caller(xprt, fi); svcerr_systemerr(xprt); } return; } static void handle_reply(int fd, SVCXPRT *xprt) { XDR reply_xdrs; struct rpc_msg reply_msg; struct rpc_err reply_error; char *buffer; struct finfo *fi; int inlen, pos, len; struct r_rmtcall_args a; struct sockaddr_storage ss; socklen_t fromlen; #ifdef SVC_RUN_DEBUG char *uaddr; #endif buffer = malloc(RPC_BUF_MAX); if (buffer == NULL) goto done; do { fromlen = sizeof(ss); inlen = recvfrom(fd, buffer, RPC_BUF_MAX, 0, (struct sockaddr *)&ss, &fromlen); } while (inlen < 0 && errno == EINTR); if (inlen < 0) { if (debugging) fprintf(stderr, "handle_reply: recvfrom returned %d, errno %d\n", inlen, errno); goto done; } reply_msg.acpted_rply.ar_verf = _null_auth; reply_msg.acpted_rply.ar_results.where = 0; reply_msg.acpted_rply.ar_results.proc = (xdrproc_t) xdr_void; xdrmem_create(&reply_xdrs, buffer, (u_int)inlen, XDR_DECODE); if (!xdr_replymsg(&reply_xdrs, &reply_msg)) { if (debugging) (void) fprintf(stderr, "handle_reply: xdr_replymsg failed\n"); goto done; } fi = forward_find(reply_msg.rm_xid); #ifdef SVC_RUN_DEBUG if (debugging) { fprintf(stderr, "handle_reply: reply xid: %d fi addr: %p\n", reply_msg.rm_xid, fi); } #endif if (fi == NULL) { goto done; } _seterr_reply(&reply_msg, &reply_error); if (reply_error.re_status != RPC_SUCCESS) { if (debugging) (void) fprintf(stderr, "handle_reply: %s\n", clnt_sperrno(reply_error.re_status)); send_svcsyserr(xprt, fi); goto done; } pos = XDR_GETPOS(&reply_xdrs); len = inlen - pos; a.rmt_args.args = &buffer[pos]; a.rmt_args.arglen = len; a.rmt_uaddr = fi->uaddr; a.rmt_localvers = fi->versnum; xprt_set_caller(xprt, fi); #ifdef SVC_RUN_DEBUG uaddr = taddr2uaddr(rpcbind_get_conf("udp"), svc_getrpccaller(xprt)); if (debugging) { fprintf(stderr, "handle_reply: forwarding address %s to %s\n", a.rmt_uaddr, uaddr ? uaddr : "unknown"); } if (uaddr) free(uaddr); #endif svc_sendreply(xprt, (xdrproc_t) xdr_rmtcall_result, (char *) &a); done: if (buffer) free(buffer); if (reply_msg.rm_xid == 0) { #ifdef SVC_RUN_DEBUG if (debugging) { fprintf(stderr, "handle_reply: NULL xid on exit!\n"); } #endif } else (void) free_slot_by_xid(reply_msg.rm_xid); return; } static void find_versions(rpcprog_t prog, char *netid, rpcvers_t *lowvp, rpcvers_t *highvp) { register rpcblist_ptr rbl; unsigned int lowv = 0; unsigned int highv = 0; for (rbl = list_rbl; rbl != NULL; rbl = rbl->rpcb_next) { if ((rbl->rpcb_map.r_prog != prog) || ((rbl->rpcb_map.r_netid != NULL) && (strcasecmp(rbl->rpcb_map.r_netid, netid) != 0))) continue; if (lowv == 0) { highv = rbl->rpcb_map.r_vers; lowv = highv; } else if (rbl->rpcb_map.r_vers < lowv) { lowv = rbl->rpcb_map.r_vers; } else if (rbl->rpcb_map.r_vers > highv) { highv = rbl->rpcb_map.r_vers; } } *lowvp = lowv; *highvp = highv; return; } /* * returns the item with the given program, version number and netid. * If that version number is not found, it returns the item with that * program number, so that address is now returned to the caller. The * caller when makes a call to this program, version number, the call * will fail and it will return with PROGVERS_MISMATCH. The user can * then determine the highest and the lowest version number for this * program using clnt_geterr() and use those program version numbers. * * Returns the RPCBLIST for the given prog, vers and netid */ static rpcblist_ptr find_service(rpcprog_t prog, rpcvers_t vers, char *netid) { register rpcblist_ptr hit = NULL; register rpcblist_ptr rbl; for (rbl = list_rbl; rbl != NULL; rbl = rbl->rpcb_next) { if ((rbl->rpcb_map.r_prog != prog) || ((rbl->rpcb_map.r_netid != NULL) && (strcasecmp(rbl->rpcb_map.r_netid, netid) != 0))) continue; hit = rbl; if (rbl->rpcb_map.r_vers == vers) break; } return (hit); } /* * Copies the name associated with the uid of the caller and returns * a pointer to it. Similar to getwd(). */ static char * getowner(SVCXPRT *transp, char *owner, size_t ownersize) { uid_t uid; if (__rpc_get_local_uid(transp, &uid) < 0) strlcpy(owner, "unknown", ownersize); else if (uid == 0) strlcpy(owner, "superuser", ownersize); else snprintf(owner, ownersize, "%d", uid); return owner; } #ifdef PORTMAP /* * Add this to the pmap list only if it is UDP or TCP. */ static int add_pmaplist(RPCB *arg) { struct pmap pmap; struct pmaplist *pml; int h1, h2, h3, h4, p1, p2; if (strcmp(arg->r_netid, udptrans) == 0) { /* It is UDP! */ pmap.pm_prot = IPPROTO_UDP; } else if (strcmp(arg->r_netid, tcptrans) == 0) { /* It is TCP */ pmap.pm_prot = IPPROTO_TCP; } else /* Not an IP protocol */ return (0); /* interpret the universal address for TCP/IP */ if (sscanf(arg->r_addr, "%d.%d.%d.%d.%d.%d", &h1, &h2, &h3, &h4, &p1, &p2) != 6) return (0); pmap.pm_port = ((p1 & 0xff) << 8) + (p2 & 0xff); pmap.pm_prog = arg->r_prog; pmap.pm_vers = arg->r_vers; /* * add to END of list */ pml = malloc(sizeof (struct pmaplist)); if (pml == NULL) { (void) syslog(LOG_ERR, "rpcbind: no memory!\n"); return (1); } pml->pml_map = pmap; pml->pml_next = NULL; if (list_pml == NULL) { list_pml = pml; } else { struct pmaplist *fnd; /* Attach to the end of the list */ for (fnd = list_pml; fnd->pml_next; fnd = fnd->pml_next) ; fnd->pml_next = pml; } return (0); } /* * Delete this from the pmap list only if it is UDP or TCP. */ static int del_pmaplist(RPCB *arg) { struct pmaplist *pml; struct pmaplist *prevpml, *fnd; unsigned long prot; if (strcmp(arg->r_netid, udptrans) == 0) { /* It is UDP! */ prot = IPPROTO_UDP; } else if (strcmp(arg->r_netid, tcptrans) == 0) { /* It is TCP */ prot = IPPROTO_TCP; } else if (arg->r_netid[0] == 0) { prot = 0; /* Remove all occurrences */ } else { /* Not an IP protocol */ return (0); } for (prevpml = NULL, pml = list_pml; pml; /* cstyle */) { if ((pml->pml_map.pm_prog != arg->r_prog) || (pml->pml_map.pm_vers != arg->r_vers) || (prot && (pml->pml_map.pm_prot != prot))) { /* both pml & prevpml move forwards */ prevpml = pml; pml = pml->pml_next; continue; } /* found it; pml moves forward, prevpml stays */ fnd = pml; pml = pml->pml_next; if (prevpml == NULL) list_pml = pml; else prevpml->pml_next = pml; free(fnd); } return (0); } #endif /* PORTMAP */ Index: projects/clang370-import =================================================================== --- projects/clang370-import (revision 288925) +++ projects/clang370-import (revision 288926) Property changes on: projects/clang370-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,2 ## Merged /user/ngie/more-tests:r288828 Merged /head:r288836-288925