diff options
Diffstat (limited to 'arch/arm/mvp/pvtcpkm')
-rw-r--r-- | arch/arm/mvp/pvtcpkm/COPYING | 341 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/Kbuild | 9 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/Makefile | 1 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/check_kconfig.c | 91 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm.h | 171 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_os.h | 150 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_os_linux.c | 371 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_os_linux.h | 699 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_os_mod_linux.c | 105 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_os_mod_ver.h | 38 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_svc.h | 71 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/comm_transp.h | 90 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/include_check.h | 18 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp.c | 587 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp.h | 458 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp_off.c | 81 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp_off.h | 219 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp_off_io_linux.c | 831 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp_off_linux.c | 2858 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp_off_linux.h | 226 | ||||
-rw-r--r-- | arch/arm/mvp/pvtcpkm/pvtcp_off_linux_shim.S | 70 |
21 files changed, 0 insertions, 7485 deletions
diff --git a/arch/arm/mvp/pvtcpkm/COPYING b/arch/arm/mvp/pvtcpkm/COPYING deleted file mode 100644 index 10828e0..0000000 --- a/arch/arm/mvp/pvtcpkm/COPYING +++ /dev/null @@ -1,341 +0,0 @@ - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/arch/arm/mvp/pvtcpkm/Kbuild b/arch/arm/mvp/pvtcpkm/Kbuild deleted file mode 100644 index d2ec844..0000000 --- a/arch/arm/mvp/pvtcpkm/Kbuild +++ /dev/null @@ -1,9 +0,0 @@ -# Warning: autogenerated -obj-m := pvtcpkm.o -pvtcpkm-objs := check_kconfig.o pvtcp_off_io_linux.o pvtcp_off_linux.o comm_os_linux.o comm_os_mod_linux.o pvtcp.o pvtcp_off.o pvtcp_off_linux_shim.o - -ccflags-y += -fno-pic -fno-dwarf2-cfi-asm -march=armv7-a -D__linux__ -ccflags-y += -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -DPVTCP_BUILDING_SERVER -ccflags-y += -mfpu=neon -DIN_MODULE -DGPLED_CODE -ccflags-y += --std=gnu89 -O2 -g2 -ggdb -mapcs -fno-optimize-sibling-calls -mno-sched-prolog -ccflags-$(CONFIG_VMWARE_MVP_DEBUG) += -DMVP_DEBUG diff --git a/arch/arm/mvp/pvtcpkm/Makefile b/arch/arm/mvp/pvtcpkm/Makefile deleted file mode 100644 index 16eb389..0000000 --- a/arch/arm/mvp/pvtcpkm/Makefile +++ /dev/null @@ -1 +0,0 @@ -# Warning: autogenerated diff --git a/arch/arm/mvp/pvtcpkm/check_kconfig.c b/arch/arm/mvp/pvtcpkm/check_kconfig.c deleted file mode 100644 index 6fc27a1..0000000 --- a/arch/arm/mvp/pvtcpkm/check_kconfig.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * @brief Check for required kernel configuration - * - * Check to make sure that the kernel options that the MVP hypervisor requires - * have been enabled in the kernel that this kernel module is being built - * against. - */ -#include <linux/version.h> - -/* - * Minimum kernel version - * - network namespace support is only really functional starting in 2.6.29 - * - Android Gingerbread requires 2.6.35 - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -#error "MVP requires a host kernel newer than 2.6.35" -#endif - -/* module loading ability */ -#ifndef CONFIG_MODULES -#error "MVP requires kernel loadable module support be enabled (CONFIG_MODULES)" -#endif -#ifndef CONFIG_MODULE_UNLOAD -#error "MVP requires kernel module unload support be enabled (CONFIG_MODULE_UNLOAD)" -#endif - -/* sysfs */ -#ifndef CONFIG_SYSFS -#error "MVP requires sysfs support (CONFIG_SYSFS)" -#endif - -/* network traffic isolation */ -#ifndef CONFIG_NAMESPACES -#error "MVP networking support requires namespace support (CONFIG_NAMESPACES)" -#endif -#ifndef CONFIG_NET_NS -#error "MVP networking support requires Network Namespace support to be enabled (CONFIG_NET_NS)" -#endif - -/* TCP/IP networking */ -#ifndef CONFIG_INET -#error "MVP networking requires IPv4 support (CONFIG_INET)" -#endif -#ifndef CONFIG_IPV6 -#error "MVP networking requires IPv6 support (CONFIG_IPV6)" -#endif - -/* VPN support */ -#if !defined(CONFIG_TUN) && !defined(CONFIG_TUN_MODULE) -#error "MVP VPN support requires TUN device support (CONFIG_TUN)" -#endif - -#if !defined(CONFIG_NETFILTER) && !defined(PVTCP_DISABLE_NETFILTER) -#error "MVP networking support requires netfilter support (CONFIG_NETFILTER)" -#endif - -/* Force /proc/config.gz support for eng/userdebug builds */ -#ifdef MVP_DEBUG -#if !defined(CONFIG_IKCONFIG) || !defined(CONFIG_IKCONFIG_PROC) -#error "MVP kernel /proc/config.gz support required for debuggability (CONFIG_IKCONFIG_PROC)" -#endif -#endif - -/* Sanity check we're only dealing with the memory hotplug + migrate and/or - * compaction combo */ -#ifdef CONFIG_MIGRATION -#if defined(CONFIG_NUMA) || defined(CONFIG_CPUSETS) || defined(CONFIG_MEMORY_FAILURE) -#error "MVP not tested with migration features other than CONFIG_MEMORY_HOTPLUG and CONFIG_COMPACTION" -#endif -#endif diff --git a/arch/arm/mvp/pvtcpkm/comm.h b/arch/arm/mvp/pvtcpkm/comm.h deleted file mode 100644 index 877731d..0000000 --- a/arch/arm/mvp/pvtcpkm/comm.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Communication functions based on queue pair transport APIs. - * - * Comm is a shared memory-based mechanism that facilitates the implementation - * of kernel components that require host-to-guest, or guest-to-guest - * communication. - * This facility assumes the availability of a minimal shared memory queue pair - * implementation, such as MVP queue pairs or VMCI queue pairs. The latter must - * provide primitives for queue pair creation and destruction, and reading and - * writing from/to queue pairs. - * Comm assumes that the queue pair (transport) layer is not concerned with - * multi-threading, locking or flow control, and does not require such features. - */ - -#ifndef _COMM_H_ -#define _COMM_H_ - -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_PV -#define INCLUDE_ALLOW_GPL -#include "include_check.h" - -#include "comm_os.h" -#include "comm_transp.h" - - -/* Default/maximum Comm timeouts (in milliseconds). */ -#define COMM_MAX_TO 60000ULL -#define COMM_MAX_TO_UNINT (COMM_MAX_TO + 1) - -#define COMM_OPF_SET_ERR(flags) ((flags) |= 128) -#define COMM_OPF_CLEAR_ERR(flags) ((flags) &= 127) -#define COMM_OPF_TEST_ERR(flags) ((flags) & 128) - -#define COMM_OPF_SET_VAL(flags, val) ((flags) |= ((val) & 127)) -#define COMM_OPF_GET_VAL(flags) ((flags) & 127) - -/** - * Packet (header) structure. - * NB: Do not change this structure, especially the first three fields; there - * will be consequences. It may be extended, but it's not recommended: all - * operations carry this header, so it's better kept in its minimal form. - */ - -typedef struct CommPacket { - unsigned int len; // Total length - unsigned char flags; // Operation flags - unsigned char opCode; // Operation to call - unsigned short data16; // Auxiliary data - unsigned long long data64; - unsigned long long data64ex; - union { - struct { - unsigned int data32; - unsigned int data32ex; - }; - unsigned long long data64ex2; - }; -} CommPacket; - - -/* Opaque structure representing a communication channel. */ - -struct CommChannelPriv; -typedef struct CommChannelPriv *CommChannel; - - -/* Input operations associated with a comm channel. */ - -typedef void (*CommOperationFunc)(CommChannel channel, - void *state, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen); - - -/* Helper macros */ - -#define COMM_DEFINE_OP(funcName) \ -void \ -funcName(CommChannel channel, \ - void *state, \ - CommPacket *packet, \ - struct kvec *vec, \ - unsigned int vecLen) - - -/* Comm-based implementations. */ - -typedef struct CommImpl { - struct module *owner; - int (*checkArgs)(CommTranspInitArgs *transpArgs); - void *(*stateCtor)(CommChannel channel); - void (*stateDtor)(void *state); - void *(*dataAlloc)(unsigned int dataLen); - void (*dataFree)(void *data); - const CommOperationFunc *operations; - void (*closeNtf)(void *closeNtfData, - const CommTranspInitArgs *transpArgs, - int inBH); - void *closeNtfData; - void (*activateNtf)(void *activateNtfData, - CommChannel channel); - void *activateNtfData; - unsigned long long openAtMillis; - unsigned long long openTimeoutAtMillis; - CommTranspID ntfCenterID; -} CommImpl; - - -int Comm_Init(unsigned int maxChannels); -int Comm_Finish(unsigned long long *timeoutMillis); -int Comm_RegisterImpl(const CommImpl *impl); -void Comm_UnregisterImpl(const CommImpl *impl); -int Comm_IsActive(CommChannel channel); -CommTranspInitArgs Comm_GetTranspInitArgs(CommChannel channel); -void *Comm_GetState(CommChannel channel); -int Comm_Dispatch(CommChannel channel); -unsigned int Comm_DispatchAll(void); -void Comm_Put(CommChannel channel); -void Comm_DispatchUnlock(CommChannel channel); -int Comm_Lock(CommChannel channel); -void Comm_Unlock(CommChannel channel); -int Comm_Zombify(CommChannel channel, int inBH); - -int -Comm_Alloc(const CommTranspInitArgs *transpArgs, - const CommImpl *impl, - int inBH, - CommChannel *newChannel); - - -int -Comm_Write(CommChannel channel, - const CommPacket *packet, - unsigned long long *timeoutMillis); - -int -Comm_WriteVec(CommChannel channel, - const CommPacket *packet, - struct kvec **vec, - unsigned int *vecLen, - unsigned long long *timeoutMillis, - unsigned int *iovOffset); - -unsigned int Comm_RequestInlineEvents(CommChannel channel); -unsigned int Comm_ReleaseInlineEvents(CommChannel channel); - -#endif // _COMM_H_ diff --git a/arch/arm/mvp/pvtcpkm/comm_os.h b/arch/arm/mvp/pvtcpkm/comm_os.h deleted file mode 100644 index 91305f1..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_os.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Cross-platform base type definitions and function declarations. - * Includes OS-specific base type definitions and function declarations. - */ - -#ifndef _COMM_OS_H_ -#define _COMM_OS_H_ - -/* For-ever timeout constant (in milliseconds). */ -#define COMM_OS_4EVER_TO ((unsigned long long)(~0UL >> 1)) - -/* Condition function prototype. Returns 1: true, 0: false, < 0: error code. */ -typedef int (*CommOSWaitConditionFunc)(void *arg1, void *arg2); - -/* Dispatch function prototype. Called by input (dispatch) kernel threads. */ -typedef unsigned int (*CommOSDispatchFunc)(void); - -/* Module initialization and exit callback functions. */ -extern int (*commOSModInit)(void *args); -extern void (*commOSModExit)(void); - -/* Macro to assign Init and Exit callbacks. */ -#define COMM_OS_MOD_INIT(init, exit) \ - int (*commOSModInit)(void *args) = init; \ - void (*commOSModExit)(void) = exit - - -/* - * OS-specific implementations must provide the following: - * 1. Types: - * CommOSAtomic - * CommOSSpinlock - * CommOSMutex - * CommOSWaitQueue - * CommOSWork - * CommOSWorkFunc - * CommOSList - * CommOSModule - * struct kvec - * - * 2. Definition, initializers: - * CommOSSpinlock_Define() - * - * 3. Functions: - * void CommOS_Debug(const char *format, ...); - * void CommOS_Log(const char *format, ...); - * void CommOS_WriteAtomic(CommOSAtomic *atomic, int val); - * int CommOS_ReadAtomic(CommOSAtomic *atomic); - * int CommOS_AddReturnAtomic(CommOSAtomic *atomic, int val); - * int CommOS_SubReturnAtomic(CommOSAtomic *atomic, int val); - * void CommOS_SpinlockInit(CommOSSpinlock *lock); - * void CommOS_SpinLockBH(CommOSSpinlock *lock); - * int CommOS_SpinTrylockBH(CommOSSpinlock *lock); - * void CommOS_SpinUnlockBH(CommOSSpinlock *lock); - * void CommOS_SpinLock(CommOSSpinlock *lock); - * int CommOS_SpinTrylock(CommOSSpinlock *lock); - * void CommOS_SpinUnlock(CommOSSpinlock *lock); - * void CommOS_MutexInit(CommOSMutex *mutex); - * void CommOS_MutexLock(CommOSMutex *mutex); - * int CommOS_MutexLockUninterruptible(CommOSMutex *mutex); - * int CommOS_MutexTrylock(CommOSMutex *mutex); - * void CommOS_MutexUnlock(CommOSMutex *mutex); - * void CommOS_WaitQueueInit(CommOSWaitQueue *wq); - * CommOS_DoWait(CommOSWaitQueue *wq, - * CommOSWaitConditionFunc cond, - * void *condArg1, - * void *condArg2, - * unsigned long long *timeoutMillis, - * int interruptible); - * int CommOS_Wait(CommOSWaitQueue *wq, - * CommOSWaitConditionFunc func, - * void *funcArg1, - * void *funcArg2, - * unsigned long long *timeoutMillis); - * int CommOS_WaitUninterruptible(CommOSWaitQueue *wq, - * CommOSWaitConditionFunc func, - * void *funcArg1, - * void *funcArg2, - * unsigned long long *timeoutMillis); - * void CommOS_WakeUp(CommOSWaitQueue *wq); - * void *CommOS_KmallocNoSleep(unsigned int size); - * void *CommOS_Kmalloc(unsigned int size); - * void CommOS_Kfree(void *arg); - * void CommOS_Yield(void); - * unsigned long long CommOS_GetCurrentMillis(void); - * void CommOS_ListInit(CommOSList *list); - * int CommOS_ListEmpty(CommOSList *list); - * void CommOS_ListAdd(CommOSList *list, CommOSList *listElem); - * void CommOS_ListAddTail(CommOSList *list, CommOSList *listElem); - * void int CommOS_ListDel(CommOSList *listElem); - * Macros: - * CommOS_ListForEach(*list, *item, itemListFieldName); - * CommOS_ListForEachSafe(*list, *item, *tmp, itemListFieldName); - * void CommOS_ListSplice(CommOSList *list, CommOSList *listToAdd); - * void CommOS_ListSpliceTail(CommOSList *list, CommOSList *listToAdd); - * CommOSModule CommOS_ModuleSelf(void); - * int CommOS_ModuleGet(CommOSModule module); - * void CommOS_ModulePut(CommOSModule module); - * void CommOS_MemBarrier(void); - * - * These cannot be defined here: a) non-pointer type definitions need size - * information, and b) functions may or may not be inlined, or macros may - * be used instead. - */ - - -#ifdef __linux__ -#include "comm_os_linux.h" -#else -#error "Unsupported OS" -#endif - -/* Functions to start and stop the dispatch and aio kernel threads. */ -void CommOS_StopIO(void); -void CommOS_ScheduleDisp(void); -void CommOS_InitWork(CommOSWork *work, CommOSWorkFunc func); -int CommOS_ScheduleAIOWork(CommOSWork *work); -void CommOS_FlushAIOWork(CommOSWork *work); - -int -CommOS_StartIO(const char *dispatchTaskName, - CommOSDispatchFunc dispatchHandler, - unsigned int interval, - unsigned int maxCycles, - const char *aioTaskName); - - -#endif /* _COMM_OS_H_ */ diff --git a/arch/arm/mvp/pvtcpkm/comm_os_linux.c b/arch/arm/mvp/pvtcpkm/comm_os_linux.c deleted file mode 100644 index 61ce929..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_os_linux.c +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Linux-specific functions/types. - */ - -#include "comm_os.h" - -#define DISPATCH_MAX_CYCLES 8192 - -/* Type definitions */ - -typedef struct workqueue_struct CommOSWorkQueue; - - -/* Static data */ - -static volatile int running; -static int numCpus; -static CommOSWorkQueue *dispatchWQ; -static CommOSDispatchFunc dispatch; -static CommOSWork dispatchWorksNow[NR_CPUS]; -static CommOSWork dispatchWorks[NR_CPUS]; -static unsigned int dispatchInterval = 1; -static unsigned int dispatchMaxCycles = 2048; -static CommOSWorkQueue *aioWQ; - - -/** - * @brief Initializes a workqueue consisting of per-cpu kernel threads. - * @param name workqueue name - * @return workqueue handle if successful, NULL otherwise - */ - -static inline CommOSWorkQueue * -CreateWorkqueue(const char *name) -{ - return create_workqueue(name); -} - - -/** - * @brief Destroys a workqueue and stops its threads. - * @param[in,out] wq workqueue to destroy. - * @return workqueue handle is successful, NULL otherwise. - */ - -static inline void -DestroyWorkqueue(CommOSWorkQueue *wq) -{ - destroy_workqueue(wq); -} - - -/** - * @brief Force execution of a work item. - * @param[in,out] work work item to dequeue. - */ - -static inline void -FlushDelayedWork(CommOSWork *work) -{ - flush_delayed_work(work); -} - - -/** - * @brief Enqueue a work item to a workqueue for execution on a given cpu - * and after the specified interval. - * @param cpu cpu number. If negative, work item is enqueued on current cpu. - * @param[in,out] wq target work queue. - * @param[in,out] work work item to enqueue. - * @param jif delay interval. - * @return zero if successful, non-zero otherwise. - */ - -static inline int -QueueDelayedWorkOn(int cpu, - CommOSWorkQueue *wq, - CommOSWork *work, - unsigned long jif) -{ - if (cpu < 0) { - return !queue_delayed_work(wq, work, jif) ? -1 : 0; - } else { - return !queue_delayed_work_on(cpu, wq, work, jif) ? -1 : 0; - } -} - - -/** - * @brief Enqueues a work item to a workqueue for execution on the current cpu - * and after the specified interval. - * @param[in,out] wq target work queue. - * @param[in,out] work work item to enqueue. - * @param jif delay interval. - * @return zero if successful, non-zero otherwise. - */ - -static inline int -QueueDelayedWork(CommOSWorkQueue *wq, - CommOSWork *work, - unsigned long jif) -{ - return QueueDelayedWorkOn(-1, wq, work, jif); -} - - -/** - * @brief Cancels a queued delayed work item and synchronizes with its - * completion. - * @param[in,out] work work item to cancel - */ - -static inline void -WaitForDelayedWork(CommOSWork *work) -{ - cancel_delayed_work_sync(work); -} - - -/** - * @brief Discards work items queued to the specified workqueue. - * @param[in,out] wq work queue to flush. - */ - -static inline void -FlushWorkqueue(CommOSWorkQueue *wq) -{ - flush_workqueue(wq); -} - - -/** - * @brief Schedules dispatcher threads for immediate execution. - */ - -void -CommOS_ScheduleDisp(void) -{ - CommOSWork *work = &dispatchWorksNow[get_cpu()]; - - put_cpu(); - if (running) { - QueueDelayedWork(dispatchWQ, work, 0); - } -} - - -/** - * @brief Default delayed work callback function implementation. - * Calls the input function specified at initialization. - * @param[in,out] work work item. - */ - -static void -DispatchWrapper(CommOSWork *work) -{ - unsigned int misses; - - for (misses = 0; running && (misses < dispatchMaxCycles); ) { - /* We run for at most dispatchMaxCycles worth of channel no-ops. */ - - if (!dispatch()) { - /* No useful work was done, on any of the channels. */ - - misses++; - if ((misses % 32) == 0) { - CommOS_Yield(); - } - } else { - misses = 0; - } - } - - if (running && - (work >= &dispatchWorks[0]) && - (work <= &dispatchWorks[NR_CPUS - 1])) { - /* - * If still running _and_ this was a regular, time-based run, then - * re-arm the timer. - */ - - QueueDelayedWork(dispatchWQ, work, dispatchInterval); - } -} - - -/** - * @brief Initializes work item with specified callback function. - * @param[in,out] work work queue to initialize. - * @param func work item to initialize the queue with. - */ - -void -CommOS_InitWork(CommOSWork *work, - CommOSWorkFunc func) -{ - INIT_DELAYED_WORK(work, (work_func_t)func); -} - - -/** - * @brief Flush execution of a work item - * @param{in,out] work work item to dequeue - */ -void -CommOS_FlushAIOWork(CommOSWork *work) -{ - if (aioWQ && work) { - FlushDelayedWork(work); - } -} - - -/** - * @brief Queue a work item to the AIO workqueue. - * @param[in,out] work work item to enqueue. - * @return zero if work enqueued, non-zero otherwise. - */ - -int -CommOS_ScheduleAIOWork(CommOSWork *work) -{ - if (running && aioWQ && work) { - return QueueDelayedWork(aioWQ, work, 0); - } - return -1; -} - - -/** - * @brief Initializes the base IO system. - * @param dispatchTaskName dispatch thread(s) name. - * @param dispatchFunc dispatch function. - * @param intervalMillis periodic interval in milliseconds to call dispatch. - * The floor is 1 jiffy, regardless of how small intervalMillis is - * @param maxCycles number of cycles to do adaptive polling before scheduling. - * The maximum number of cycles is DISPATCH_MAX_CYCLES. - * @param aioTaskName AIO thread(s) name. If NULL, AIO threads aren't started. - * @return zero is successful, -1 otherwise. - * @sideeffects Dispatch threads, and if applicable, AIO threads are started. - */ - -int -CommOS_StartIO(const char *dispatchTaskName, // IN - CommOSDispatchFunc dispatchFunc, // IN - unsigned int intervalMillis, // IN - unsigned int maxCycles, // IN - const char *aioTaskName) // IN -{ - int rc; - int cpu; - - if (running) { - CommOS_Debug(("%s: I/O tasks already running.\n", __FUNCTION__)); - return 0; - } - - /* - * OK, let's test the handler against NULL. Though, the whole concept - * of checking for NULL pointers, outside cases where NULL is meaningful - * to the implementation, is relatively useless: garbage, random pointers - * rarely happen to be all-zeros. - */ - - if (!dispatchFunc) { - CommOS_Log(("%s: a NULL Dispatch handler was passed.\n", __FUNCTION__)); - return -1; - } - dispatch = dispatchFunc; - - if (intervalMillis == 0) { - intervalMillis = 4; - } - if ((dispatchInterval = msecs_to_jiffies(intervalMillis)) < 1) { - dispatchInterval = 1; - } - if (maxCycles > DISPATCH_MAX_CYCLES) { - dispatchMaxCycles = DISPATCH_MAX_CYCLES; - } else if (maxCycles > 0) { - dispatchMaxCycles = maxCycles; - } - CommOS_Debug(("%s: Interval millis %u (jif:%u).\n", __FUNCTION__, - intervalMillis, dispatchInterval)); - CommOS_Debug(("%s: Max cycles %u.\n", __FUNCTION__, dispatchMaxCycles)); - - numCpus = num_present_cpus(); - dispatchWQ = CreateWorkqueue(dispatchTaskName); - if (!dispatchWQ) { - CommOS_Log(("%s: Couldn't create %s task(s).\n", __FUNCTION__, - dispatchTaskName)); - return -1; - } - - if (aioTaskName) { - aioWQ = CreateWorkqueue(aioTaskName); - if (!aioWQ) { - CommOS_Log(("%s: Couldn't create %s task(s).\n", __FUNCTION__, - aioTaskName)); - DestroyWorkqueue(dispatchWQ); - return -1; - } - } else { - aioWQ = NULL; - } - - running = 1; - for (cpu = 0; cpu < numCpus; cpu++) { - CommOS_InitWork(&dispatchWorksNow[cpu], DispatchWrapper); - CommOS_InitWork(&dispatchWorks[cpu], DispatchWrapper); - rc = QueueDelayedWorkOn(cpu, dispatchWQ, - &dispatchWorks[cpu], - dispatchInterval); - if (rc != 0) { - CommOS_StopIO(); - return -1; - } - } - CommOS_Log(("%s: Created I/O task(s) successfully.\n", __FUNCTION__)); - return 0; -} - - -/** - * @brief Stops the base IO system. - * @sideeffects Dispatch threads, and if applicable, AIO threads are stopped. - */ - -void -CommOS_StopIO(void) -{ - int cpu; - - if (running) { - running = 0; - if (aioWQ) { - FlushWorkqueue(aioWQ); - DestroyWorkqueue(aioWQ); - aioWQ = NULL; - } - FlushWorkqueue(dispatchWQ); - for (cpu = 0; cpu < numCpus; cpu++) { - WaitForDelayedWork(&dispatchWorksNow[cpu]); - WaitForDelayedWork(&dispatchWorks[cpu]); - } - DestroyWorkqueue(dispatchWQ); - dispatchWQ = NULL; - CommOS_Log(("%s: I/O tasks stopped.\n", __FUNCTION__)); - } -} diff --git a/arch/arm/mvp/pvtcpkm/comm_os_linux.h b/arch/arm/mvp/pvtcpkm/comm_os_linux.h deleted file mode 100644 index 81ee9d1..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_os_linux.h +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Contains linux-specific type definitions and function declarations - */ - -#ifndef _COMM_OS_LINUX_H_ -#define _COMM_OS_LINUX_H_ - -#include <linux/types.h> -#include <linux/version.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) -#error "Kernel versions lower than 2.6.20 are not supported" -#endif - -#include <linux/kernel.h> -#include <linux/workqueue.h> -#include <linux/sched.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/slab.h> - - -/* - * Type definitions. - */ - -typedef atomic_t CommOSAtomic; -typedef spinlock_t CommOSSpinlock; -typedef struct mutex CommOSMutex; -typedef wait_queue_head_t CommOSWaitQueue; -typedef struct delayed_work CommOSWork; -typedef void (*CommOSWorkFunc)(CommOSWork *work); -typedef struct list_head CommOSList; -typedef struct module *CommOSModule; - - -/* - * Initializers. - */ - -#define CommOSSpinlock_Define DEFINE_SPINLOCK - - -#define COMM_OS_DOLOG(...) printk(KERN_INFO __VA_ARGS__) - - -/** - * @brief Logs given arguments in debug builds. - */ - -#if defined(COMM_OS_DEBUG) - #define CommOS_Debug(args) COMM_OS_DOLOG args -#else - #define CommOS_Debug(args) -#endif - - -/** - * @brief Logs given arguments. - */ - -#define CommOS_Log(args) COMM_OS_DOLOG args - - -/** - * @brief Logs function name and location. - */ - -#if defined(COMM_OS_TRACE) -#define TRACE(ptr) \ - do { \ - CommOS_Debug(("%p:%s: at [%s:%d] with arg ptr [0x%p].\n", current, \ - __FUNCTION__, __FILE__, __LINE__, (ptr))); \ - } while (0) -#else -#define TRACE(ptr) -#endif - - -/** - * @brief Write atomic variable - * @param[in,out] atomic variable to write - * @param val new value - */ - -static inline void -CommOS_WriteAtomic(CommOSAtomic *atomic, - int val) -{ - atomic_set(atomic, val); -} - - -/** - * @brief Reads atomic variable - * @param atomic variable to read - * @return value - */ - -static inline int -CommOS_ReadAtomic(CommOSAtomic *atomic) -{ - return atomic_read(atomic); -} - - -/** - * @brief Atomically add value to atomic variable, return new value. - * @param[in,out] atomic variable - * @param val value to add - * @return new value - */ - -static inline int -CommOS_AddReturnAtomic(CommOSAtomic *atomic, - int val) -{ - return atomic_add_return(val, atomic); -} - - -/** - * @brief Atomically substract value from atomic variable, return new value. - * @param[in,out] atomic variable - * @param val value to substract - * @return new value - */ - -static inline int -CommOS_SubReturnAtomic(CommOSAtomic *atomic, - int val) -{ - return atomic_sub_return(val, atomic); -} - - -/** - * @brief Initializes a given lock. - * @param[in,out] lock lock to initialize - */ - -static inline void -CommOS_SpinlockInit(CommOSSpinlock *lock) -{ - spin_lock_init(lock); -} - - -/** - * @brief Locks given lock and disables bottom half processing. - * @param[in,out] lock lock to lock - */ - -static inline void -CommOS_SpinLockBH(CommOSSpinlock *lock) -{ - spin_lock_bh(lock); -} - - -/** - * @brief Attempts to lock the given lock and disable BH processing. - * @param[in,out] lock lock to lock - * @return zero if successful, non-zero otherwise - */ - -static inline int -CommOS_SpinTrylockBH(CommOSSpinlock *lock) -{ - return !spin_trylock_bh(lock); -} - - -/** - * @brief Unlocks given lock and re-enables BH processing. - * @param[in,out] lock lock to unlock - */ - -static inline void -CommOS_SpinUnlockBH(CommOSSpinlock *lock) -{ - spin_unlock_bh(lock); -} - - -/** - * @brief Locks the given lock. - * @param[in,out] lock lock to lock - */ - -static inline void -CommOS_SpinLock(CommOSSpinlock *lock) -{ - spin_lock(lock); -} - - -/** - * @brief Attempts to lock the given lock. - * @param[in,out] lock lock to try-lock - * @return zero if successful, non-zero otherwise - */ - -static inline int -CommOS_SpinTrylock(CommOSSpinlock *lock) -{ - return !spin_trylock(lock); -} - - -/** - * @brief Unlocks given lock. - * @param[in,out] lock lock to unlock - */ - -static inline void -CommOS_SpinUnlock(CommOSSpinlock *lock) -{ - spin_unlock(lock); -} - - -/** - * @brief Initializes given mutex. - * @param[in,out] mutex mutex to initialize - */ - -static inline void -CommOS_MutexInit(CommOSMutex *mutex) -{ - mutex_init(mutex); -} - - -/** - * @brief Acquires mutex. - * @param[in,out] mutex mutex to lock - * @return zero if successful, non-zero otherwise (interrupted) - */ - -static inline int -CommOS_MutexLock(CommOSMutex *mutex) -{ - return mutex_lock_interruptible(mutex); -} - - -/** - * @brief Acquires mutex in uninterruptible mode. - * @param[in,out] mutex mutex to lock - */ - -static inline void -CommOS_MutexLockUninterruptible(CommOSMutex *mutex) -{ - mutex_lock(mutex); -} - - -/** - * @brief Attempts to acquire given mutex. - * @param[in,out] mutex mutex to try-lock - * @return zero if successful, non-zero otherwise - */ - -static inline int -CommOS_MutexTrylock(CommOSMutex *mutex) -{ - return !mutex_trylock(mutex); -} - - -/** - * @brief Releases a given mutex. - * @param[in,out] mutex mutex to unlock - */ - -static inline void -CommOS_MutexUnlock(CommOSMutex *mutex) -{ - mutex_unlock(mutex); -} - - -/** - * @brief Initializes a wait queue. - * @param[in,out] wq workqueue to initialize - */ - -static inline void -CommOS_WaitQueueInit(CommOSWaitQueue *wq) -{ - init_waitqueue_head(wq); -} - - -/** - * @brief Puts the caller on a wait queue until either of the following occurs: - * - the condition function (predicate) evaluates to TRUE - * - the specified timeout interval elapsed - * - a signal is pending - * @param[in,out] wq wait queue to put item on - * @param cond predicate to test - * @param condArg1 argument 1 for cond - * @param condArg2 argument 2 for cond - * @param[in,out] timeoutMillis timeout interval in milliseconds - * @param interruptible enable/disable signal pending check - * @return 1 if condition was met - * 0 if the timeout interval elapsed - * <0, if a signal is pending or other error set by condition - * @sideeffect timeoutMillis is updated to time remaining - */ - -static inline int -CommOS_DoWait(CommOSWaitQueue *wq, - CommOSWaitConditionFunc cond, - void *condArg1, - void *condArg2, - unsigned long long *timeoutMillis, - int interruptible) -{ - int rc; - DEFINE_WAIT(wait); - long timeout; -#if defined(COMM_OS_LINUX_WAIT_WORKAROUND) - long tmpTimeout; - long retTimeout; - const unsigned int interval = 50; -#endif - - if (!timeoutMillis) { - return -1; - } - if ((rc = cond(condArg1, condArg2)) != 0) { - return rc; - } - -#if defined(COMM_OS_LINUX_WAIT_WORKAROUND) - timeout = msecs_to_jiffies(interval < *timeoutMillis ? - interval : (unsigned int)*timeoutMillis); - retTimeout = msecs_to_jiffies((unsigned int)(*timeoutMillis)); - - for (; retTimeout >= 0; ) { - prepare_to_wait(wq, &wait, - (interruptible?TASK_INTERRUPTIBLE:TASK_UNINTERRUPTIBLE)); - if ((rc = cond(condArg1, condArg2))) { - break; - } - if (interruptible && signal_pending(current)) { - rc = -EINTR; - break; - } - if ((tmpTimeout = schedule_timeout(timeout))) { - retTimeout -= (timeout - tmpTimeout); - } else { - retTimeout -= timeout; - } - if (retTimeout < 0) { - retTimeout = 0; - } - } - finish_wait(wq, &wait); - if (rc == 0) { - rc = cond(condArg1, condArg2); - if (rc && (retTimeout == 0)) { - retTimeout = 1; - } - } - *timeoutMillis = (unsigned long long)jiffies_to_msecs(retTimeout); -#else // !defined(COMM_OS_LINUX_WAIT_WORKAROUND) - timeout = msecs_to_jiffies((unsigned int)(*timeoutMillis)); - - for (;;) { - prepare_to_wait(wq, &wait, - (interruptible?TASK_INTERRUPTIBLE:TASK_UNINTERRUPTIBLE)); - if ((rc = cond(condArg1, condArg2)) != 0) { - break; - } - if (interruptible && signal_pending(current)) { - rc = -EINTR; - break; - } - if ((timeout = schedule_timeout(timeout)) == 0) { - rc = 0; - break; - } - } - finish_wait(wq, &wait); - if (rc == 0) { - rc = cond(condArg1, condArg2); - if (rc && (timeout == 0)) { - timeout = 1; - } - } - *timeoutMillis = (unsigned long long)jiffies_to_msecs(timeout); -#endif - - return rc; -} - - -/** - * @brief Puts the caller on a wait queue until either of the following occurs: - * - the condition function (predicate) evaluates to TRUE - * - the specified timeout interval elapsed - * - a signal is pending - * @param[in,out] wq wait queue to put item on - * @param cond predicate to test - * @param condArg1 argument 1 for cond - * @param condArg2 argument 2 for cond - * @param[in,out] timeoutMillis timeout interval in milliseconds - * @return 1 if condition was met - * 0 if the timeout interval elapsed - * <0, if a signal is pending or other error set by condition - * @sideeffect timeoutMillis is updated to time remaining - */ - -static inline int -CommOS_Wait(CommOSWaitQueue *wq, - CommOSWaitConditionFunc cond, - void *condArg1, - void *condArg2, - unsigned long long *timeoutMillis) -{ - return CommOS_DoWait(wq, cond, condArg1, condArg2, timeoutMillis, 1); -} - - -/** - * @brief Puts the caller on a wait queue until either of the following occurs: - * - the condition function (predicate) evaluates to TRUE - * - the specified timeout interval elapsed - * @param[in,out] wq wait queue to put item on - * @param cond predicate to test - * @param condArg1 argument 1 for cond - * @param condArg2 argument 2 for cond - * @param[in,out] timeoutMillis timeout interval in milliseconds - * @return 1 if condition was met - * 0 if the timeout interval elapsed - * <0, error set by condition - * @sideeffect timeoutMillis is updated to time remaining - */ - -static inline int -CommOS_WaitUninterruptible(CommOSWaitQueue *wq, - CommOSWaitConditionFunc cond, - void *condArg1, - void *condArg2, - unsigned long long *timeoutMillis) -{ - return CommOS_DoWait(wq, cond, condArg1, condArg2, timeoutMillis, 0); -} - - -/** - * @brief Wakes up task(s) waiting on the given wait queue. - * @param[in,out] wq wait queue. - */ - -static inline void -CommOS_WakeUp(CommOSWaitQueue *wq) -{ - wake_up(wq); -} - - -/** - * @brief Allocates kernel memory of specified size; does not sleep. - * @param size size to allocate. - * @return Address of allocated memory or NULL if the allocation fails. - */ - -static inline void * -CommOS_KmallocNoSleep(unsigned int size) -{ - return kmalloc(size, GFP_ATOMIC); -} - - -/** - * @brief Allocates kernel memory of specified size; may sleep. - * @param size size to allocate. - * @return Address of allocated memory or NULL if the allocation fails. - */ - -static inline void * -CommOS_Kmalloc(unsigned int size) -{ - return kmalloc(size, GFP_KERNEL); -} - - -/** - * @brief Frees previously allocated kernel memory. - * @param obj object to free. - */ - -static inline void -CommOS_Kfree(void *obj) -{ - if (obj) { - kfree(obj); - } -} - - -/** - * @brief Yields the current cpu to other runnable tasks. - */ - -static inline void -CommOS_Yield(void) -{ - cond_resched(); -} - - -/** - * @brief Gets the current time in milliseconds. - * @return Current time in milliseconds, with precision of at most one tick. - */ - -static inline unsigned long long -CommOS_GetCurrentMillis(void) -{ - return (unsigned long long)jiffies_to_msecs(jiffies); -} - - -/** - * @brief Initializes given list. - * @param list list to initialize. - */ - -static inline void -CommOS_ListInit(CommOSList *list) -{ - INIT_LIST_HEAD(list); -} - - -/** - * @brief Tests if list is empty. - * @param list list to test. - * @return non-zero if empty, zero otherwise. - */ - -#define CommOS_ListEmpty(list) list_empty((list)) - - -/** - * @brief Adds given element to beginning of list. - * @param list list to add to. - * @param elem element to add. - */ - -#define CommOS_ListAdd(list, elem) list_add((elem), (list)) - - -/** - * @brief Adds given element to end of list. - * @param list list to add to. - * @param elem element to add. - */ - -#define CommOS_ListAddTail(list, elem) list_add_tail((elem), (list)) - - -/** - * @brief Deletes given element from its list. - * @param elem element to delete. - */ - -#define CommOS_ListDel(elem) \ - do { \ - list_del((elem)); \ - INIT_LIST_HEAD((elem)); \ - } while (0) - - -/** - * @brief Iterates over a list. - * @param list list to iterate over. - * @param[out] item stores next element. - * @param itemListFieldName name in the item structure storing the list head. - */ - -#define CommOS_ListForEach(list, item, itemListFieldName) \ - list_for_each_entry((item), (list), itemListFieldName) - - -/** - * @brief Iterates safely over a list. - * @param list list to iterate over. - * @param[out] item stores next element. May be deleted in the loop. - * @param[out] tmpItem saves iteration element. - * @param itemListFieldName name in the item structure storing the list head. - */ - -#define CommOS_ListForEachSafe(list, item, tmpItem, itemListFieldName) \ - list_for_each_entry_safe((item), (tmpItem), (list), itemListFieldName) - - -/** - * @brief Combines two lists, adds second list to beginning of first one. - * @param list list to add to. - * @param list2 list to add. - */ - -#define CommOS_ListSplice(list, list2) list_splice((list2), (list)) - - -/** - * @brief Combines two lists, adds second list to end of first one. - * @param list list to add to. - * @param list2 list to add. - */ - -#define CommOS_ListSpliceTail(list, list2) list_splice_tail((list2), (list)) - - -/** - * @brief Gets current module handle. - * @return module handle. - */ - -static inline CommOSModule -CommOS_ModuleSelf(void) -{ - return THIS_MODULE; -} - - -/** - * @brief Retains module. - * @param[in,out] module to retain. - * @return zero if successful, non-zero otherwise. - */ - -static inline int -CommOS_ModuleGet(CommOSModule module) -{ - int rc = 0; - - if (!module) { - goto out; - } - if (!try_module_get(module)) { - rc = -1; - } - -out: - return rc; -} - - -/** - * @brief Releases module. - * @param[in,out] module to release. - */ - -static inline void -CommOS_ModulePut(CommOSModule module) -{ - if (module) { - module_put(module); - } -} - - -/** - * @brief Inserts r/w memory barrier. - */ - -#define CommOS_MemBarrier smp_mb - -#endif /* _COMM_OS_LINUX_H_ */ diff --git a/arch/arm/mvp/pvtcpkm/comm_os_mod_linux.c b/arch/arm/mvp/pvtcpkm/comm_os_mod_linux.c deleted file mode 100644 index e196108..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_os_mod_linux.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Linux-specific module loading, unloading functions. - */ - -#include "comm_os.h" -#include "comm_os_mod_ver.h" - -#include <linux/moduleparam.h> - - -/* Module parameters -- passed as one 'name=value'-list string. */ - -static char modParams[256]; -module_param_string(COMM_OS_MOD_SHORT_NAME, modParams, sizeof modParams, 0644); - - -/** - * @brief Module initialization entry point. Calls the commOSModInit - * function pointer to perform upper layer initialization. - * @return zero if successful, non-zero otherwise. - */ - -static int __init -ModInit(void) -{ - int rc; - - if (!commOSModInit) { - CommOS_Log(("%s: Can't find \'init\' function for module \'" \ - COMM_OS_MOD_SHORT_NAME_STRING "\'.\n", __FUNCTION__)); - return -1; - } - - CommOS_Debug(("%s: Module parameters: [%s].\n", __FUNCTION__, modParams)); - - rc = (*commOSModInit)(modParams); - if (rc == 0) { - CommOS_Log(("%s: Module \'" COMM_OS_MOD_SHORT_NAME_STRING \ - "\' has been successfully initialized.\n", __FUNCTION__)); - } else { - CommOS_Log(("%s: Module \'" COMM_OS_MOD_SHORT_NAME_STRING \ - "\' could not be initialized [%d].\n", __FUNCTION__, rc)); - } - - return rc > 0 ? -rc : rc; -} - - -/** - * @brief Module exit function. Calls the commOSModExit function pointer - * to perform upper layer cleanup. - */ - -static void __exit -ModExit(void) -{ - if (!commOSModExit) { - CommOS_Log(("%s: Can't find \'fini\' function for module \'" \ - COMM_OS_MOD_SHORT_NAME_STRING "\'.\n", __FUNCTION__)); - return; - } - - (*commOSModExit)(); - CommOS_Log(("%s: Module \'" COMM_OS_MOD_SHORT_NAME_STRING \ - "\' has been stopped.\n", __FUNCTION__)); -} - - -module_init(ModInit); -module_exit(ModExit); - -/* Module information. */ -MODULE_AUTHOR("VMware, Inc."); -MODULE_DESCRIPTION(COMM_OS_MOD_NAME_STRING); -MODULE_VERSION(COMM_OS_MOD_VERSION_STRING); -MODULE_LICENSE("GPL v2"); -/* - * Starting with SLE10sp2, Novell requires that IHVs sign a support agreement - * with them and mark their kernel modules as externally supported via a - * change to the module header. If this isn't done, the module will not load - * by default (i.e., neither mkinitrd nor modprobe will accept it). - */ -MODULE_INFO(supported, "external"); diff --git a/arch/arm/mvp/pvtcpkm/comm_os_mod_ver.h b/arch/arm/mvp/pvtcpkm/comm_os_mod_ver.h deleted file mode 100644 index 5e14c62..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_os_mod_ver.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Version definitions for the pvTCP module. - */ - -#ifndef _COMM_OS_MOD_VER_H_ -#define _COMM_OS_MOD_VER_H_ - -#define COMM_OS_MOD_NAME_STRING "VMware paravirtualized tcp/ip module" -#define COMM_OS_MOD_SHORT_NAME pvtcp -#define COMM_OS_MOD_SHORT_NAME_STRING "pvtcp" - -#define COMM_OS_MOD_VERSION 1.0.0.0 -#define COMM_OS_MOD_VERSION_COMMAS 1,0,0,0 -#define COMM_OS_MOD_VERSION_STRING "1.0.0.0" - -#endif /* _COM_OS_MOD_VER_H_ */ diff --git a/arch/arm/mvp/pvtcpkm/comm_svc.h b/arch/arm/mvp/pvtcpkm/comm_svc.h deleted file mode 100644 index 784ec76..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_svc.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Communication functions exported by the comm_rt module. - */ - -#ifndef _COMM_SVC_H_ -#define _COMM_SVC_H_ - -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_PV -#define INCLUDE_ALLOW_GPL -#include "include_check.h" - -#include "comm.h" - -int CommSvc_RegisterImpl(const CommImpl *impl); -void CommSvc_UnregisterImpl(const CommImpl *impl); -int CommSvc_Zombify(CommChannel channel, int inBH); -int CommSvc_IsActive(CommChannel channel); -CommTranspInitArgs CommSvc_GetTranspInitArgs(CommChannel channel); -void *CommSvc_GetState(CommChannel channel); -void CommSvc_Put(CommChannel channel); -void CommSvc_DispatchUnlock(CommChannel channel); -int CommSvc_Lock(CommChannel channel); -void CommSvc_Unlock(CommChannel channel); -int CommSvc_ScheduleAIOWork(CommOSWork *work); - -int -CommSvc_Alloc(const CommTranspInitArgs *transpArgs, - const CommImpl *impl, - int inBH, - CommChannel *newChannel); - -int -CommSvc_Write(CommChannel channel, - const CommPacket *packet, - unsigned long long *timeoutMillis); - -int -CommSvc_WriteVec(CommChannel channel, - const CommPacket *packet, - struct kvec **vec, - unsigned int *vecLen, - unsigned long long *timeoutMillis, - unsigned int *iovOffset); - -unsigned int CommSvc_RequestInlineEvents(CommChannel channel); -unsigned int CommSvc_ReleaseInlineEvents(CommChannel channel); - -#endif // _COMM_SVC_H_ diff --git a/arch/arm/mvp/pvtcpkm/comm_transp.h b/arch/arm/mvp/pvtcpkm/comm_transp.h deleted file mode 100644 index c46f849..0000000 --- a/arch/arm/mvp/pvtcpkm/comm_transp.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Generic shared memory transport API. - */ - -#ifndef _COMM_TRANSP_H_ -#define _COMM_TRANSP_H_ - -#define INCLUDE_ALLOW_PV -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_MONITOR -#define INCLUDE_ALLOW_GPL -#include "include_check.h" - -/* - * Common shared memory identifier. - * External handle that makes sense to both hypervisor and guest. - */ - -#define COMM_TRANSP_ID_8_ANY ((unsigned char)-1) -#define COMM_TRANSP_ID_32_ANY ((unsigned int)-1) -#define COMM_TRANSP_ID_64_ANY ((unsigned long long)-1) - - -typedef struct CommTranspID { - union { - unsigned char d8[8]; - unsigned int d32[2]; - unsigned long long d64; - }; -} CommTranspID; - - -/* Basic initialization arguments. */ - -typedef enum CommTranspInitMode { - COMM_TRANSP_INIT_CREATE = 0x0, - COMM_TRANSP_INIT_ATTACH = 0x1 -} CommTranspInitMode; - -typedef struct CommTranspInitArgs { - unsigned int capacity; // Shared memory capacity. - unsigned int type; // Type / implementation using this area. - CommTranspID id; // ID (name) of shared memory area. - CommTranspInitMode mode; // Init mode (above). -} CommTranspInitArgs; - - -/** - * @brief Generate a type id from description (protocol) string. This function - * uses djb2, a string hashing algorithm by Dan Bernstein. - * (see http://www.cse.yorku.ca/~oz/hash.html) - * @param str string to hash - * @return 32-bit hash value - */ - -static inline unsigned int -CommTransp_GetType(const char *str) -{ - unsigned int hash = 5381; - int c; - - while ((c = *str++)) { - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - } - return hash; -} - -#endif // _COMM_TRANSP_H_ diff --git a/arch/arm/mvp/pvtcpkm/include_check.h b/arch/arm/mvp/pvtcpkm/include_check.h deleted file mode 100644 index 2eeafe7..0000000 --- a/arch/arm/mvp/pvtcpkm/include_check.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for Empty File Placeholder - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ diff --git a/arch/arm/mvp/pvtcpkm/pvtcp.c b/arch/arm/mvp/pvtcpkm/pvtcp.c deleted file mode 100644 index fdfb0d2..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp.c +++ /dev/null @@ -1,587 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Pvtcp common code. - */ - -#include "pvtcp.h" - - -/* - * Operation table. - */ - -CommOperationFunc pvtcpOperations[] = { - [PVTCP_OP_FLOW] = PvtcpFlowOp, - [PVTCP_OP_IO] = PvtcpIoOp, - [PVTCP_OP_CREATE] = PvtcpCreateOp, - [PVTCP_OP_RELEASE] = PvtcpReleaseOp, - [PVTCP_OP_BIND] = PvtcpBindOp, - [PVTCP_OP_LISTEN] = PvtcpListenOp, - [PVTCP_OP_ACCEPT] = PvtcpAcceptOp, - [PVTCP_OP_CONNECT] = PvtcpConnectOp, - [PVTCP_OP_SHUTDOWN] = PvtcpShutdownOp, - [PVTCP_OP_SETSOCKOPT] = PvtcpSetSockOptOp, - [PVTCP_OP_GETSOCKOPT] = PvtcpGetSockOptOp, - [PVTCP_OP_IOCTL] = PvtcpIoctlOp, - [PVTCP_OP_INVALID] = NULL -}; - - -/* - * Implementation block. - */ - -CommImpl pvtcpImpl = { - .owner = NULL, - .checkArgs = PvtcpCheckArgs, - .stateCtor = PvtcpStateAlloc, - .stateDtor = PvtcpStateFree, - .dataAlloc = PvtcpBufAlloc, - .dataFree = PvtcpBufFree, - .operations = pvtcpOperations, - .closeNtf = PvtcpCloseNtf, - .closeNtfData = &pvtcpImpl, - .ntfCenterID = {{ - .d32[0] = 2U /* x86 host context (vmci, only). */, - .d32[1] = 10000 /* Default, not yet reserved, resource (vmci, only). */ - }} -}; - - -/* - * Version array. - */ - -const char *pvtcpVersions[] = { - [PVTCP_VERS_1_1] = PVTCP_COMM_IMPL_VERS_1_1, - [PVTCP_VERS_1_0] = PVTCP_COMM_IMPL_VERS_1_0 -}; - -const unsigned int pvtcpVersionsSize = - (sizeof pvtcpVersions / sizeof pvtcpVersions[0]); - - -/* - * Client (pv) channel to offload side. We choose to define it here, although - * it's only applicable to the pv implementation. The reason is that we can - * share a common close notification function which does the right thing - * depending on the channel configuration. - */ - -CommChannel pvtcpClientChannel; - - -/* - * Built-in state interfaces. - */ - -static PvtcpIfConf ifUnbound = { - .family = PVTCP_PF_UNBOUND -}; -const PvtcpIfConf *pvtcpIfUnbound = &ifUnbound; - -static PvtcpIfConf ifDeathRow = { - .family = PVTCP_PF_DEATH_ROW -}; -const PvtcpIfConf *pvtcpIfDeathRow = &ifDeathRow; - -static PvtcpIfConf ifLoopbackInet4 = { - .family = PVTCP_PF_LOOPBACK_INET4 -}; -const PvtcpIfConf *pvtcpIfLoopbackInet4 = &ifLoopbackInet4; - - -/* Functions */ - -/** - * @brief Checks if the IF configuration has reasonable values. - * @param conf configuration to check - * @return zero if successful, -1 otherwise - */ - -static int -IfCheck(const PvtcpIfConf *conf) -{ - if (!conf || - ((conf->family != PF_INET) && - (conf->family != PF_INET6) && - (conf->family != PVTCP_PF_UNBOUND) && - (conf->family != PVTCP_PF_DEATH_ROW) && - (conf->family != PVTCP_PF_LOOPBACK_INET4))) { - return -1; - } - - /** @todo Need more checks for IP/netmask format validity. */ - return 0; -} - - -/** - * @brief Checks if the IF has reasonable values, but restricts types to - * AF_INET and AF_INET6 - * @param conf IF to check - * @return zero if successful, -1 otherwise - */ - -static int -IfRestrictedCheck(const PvtcpIfConf *conf) -{ - if (IfCheck(conf) || - ((conf->family != PF_INET) && - (conf->family != PF_INET6))) { - return -1; - } - return 0; -} - - -/** - * @brief Finds a netif given a state and a configuration. The configuration - * must have already been checked. This function doesn't lock, so it - * should not be called when the state, or the netif for the passed - * configuration may be deleted. - * @param state state to look for. - * @param conf configuration to look for. - * @return netif matching configuration, or NULL. - */ - -PvtcpIf * -PvtcpStateFindIf(PvtcpState *state, - const PvtcpIfConf *conf) -{ - PvtcpIf *netif; - - if (!state) { - return NULL; - } - - if (conf->family == PVTCP_PF_UNBOUND) { - return &state->ifUnbound; - } - - if (conf->family == PVTCP_PF_DEATH_ROW) { - return &state->ifDeathRow; - } - - if (conf->family == PVTCP_PF_LOOPBACK_INET4) { - return &state->ifLoopbackInet4; - } - - CommOS_ListForEach(&state->ifList, netif, stateLink) { - if (netif->conf.family == conf->family) { - if ((conf->family == PF_INET && - !memcmp(&netif->conf.addr.in, &conf->addr.in, - sizeof conf->addr.in)) || - (conf->family == PF_INET6 && - !memcmp(&netif->conf.addr.in6, &conf->addr.in6, - sizeof conf->addr.in6))) { - return netif; - } - } - } - return NULL; -} - - -/** - * @brief Creates and initializes a new netif for a given channel and with - * the specified configuration. Death row and unbound netifs may not - * be added using this function. - * @param[in,out] channel channel to make a new netif in - * @param conf configuration to set netif to - * @return 0 if successful, -1 otherwise - * @sideeffect May allocate memory - */ - -int -PvtcpStateAddIf(CommChannel channel, - const PvtcpIfConf *conf) -{ - int rc = -1; - PvtcpState *state; - PvtcpIf *netif; - - if (!channel || IfRestrictedCheck(conf)) { - return rc; - } - - if (CommSvc_Lock(channel)) { - return rc; /* channel isn't active. */ - } - - state = CommSvc_GetState(channel); - if (!state) { - goto out; - } - - if (PvtcpStateFindIf(state, conf)) { - goto out; /* Already configured. */ - } - - netif = CommOS_Kmalloc(sizeof *netif); - if (!netif) { - goto out; - } - - INIT_LIST_HEAD(&netif->stateLink); - INIT_LIST_HEAD(&netif->sockList); - netif->state = state; - netif->conf = *conf; - CommOS_ListAddTail(&state->ifList, &netif->stateLink); - rc = 0; - -out: - CommSvc_Unlock(channel); - return rc; -} - - -/** - * @brief Removes and potentially deallocates all sockets associated with the - * given netif and deallocates the latter. - * @param[in,out] netif netif to deallocate - * @sideeffect Closes sockets, deallocates memory - */ - -static void -IfFree(PvtcpIf *netif) -{ - PvtcpSock *pvsk; - PvtcpSock *tmp; - - if (netif) { - CommOS_ListForEachSafe(&netif->sockList, pvsk, tmp, ifLink) { - CommOS_ListDel(&pvsk->ifLink); - PvtcpReleaseSocket(pvsk); - } - if ((netif->conf.family != PVTCP_PF_UNBOUND) && - (netif->conf.family != PVTCP_PF_DEATH_ROW) && - (netif->conf.family != PVTCP_PF_LOOPBACK_INET4)) { - CommOS_ListDel(&netif->stateLink); - CommOS_Kfree(netif); - } - } -} - - -/** - * @brief Closes all sockets associated with, and deallocates the netif - * in the given channel and with the specified configuration. - * Death row and unbound netifs may not be removed using this function. - * @param[in,out] channel channel to remove from - * @param conf configuration specified - * @return zero if successful, error code otherwise - * @sideeffect Closes sockets, deallocates memory - */ - -void -PvtcpStateRemoveIf(CommChannel channel, - const PvtcpIfConf *conf) -{ - PvtcpState *state; - PvtcpIf *netif; - - if (!channel || IfRestrictedCheck(conf)) { - return; - } - - if (CommSvc_Lock(channel)) { - return; /* channel isn't active. */ - } - - state = CommSvc_GetState(channel); - if (state && (netif = PvtcpStateFindIf(state, conf))) { - if (netif->state == state) { - IfFree(netif); - } - } - - CommSvc_Unlock(channel); -} - - -/** - * @brief Adds a socket to an existing netif. If the socket is already on a - * different netif, it is removed from that netif. - * It locks the must-be-active channel. We use that lock to guard - * against concurrent removal of the netif. - * @param[in,out] channel channel to add to - * @param conf specified configuration - * @param[in,out] sock socket to add - * @return zero if successful, -1 otherwise - */ - -int -PvtcpStateAddSocket(CommChannel channel, - const PvtcpIfConf *conf, - PvtcpSock *sock) -{ - int rc = -1; - PvtcpState *state; - PvtcpIf *netif; - - if (!channel || !sock || (sock->channel != channel) || IfCheck(conf)) { - return rc; - } - - if (CommSvc_Lock(channel)) { - return rc; /* channel isn't active. */ - } - - state = CommSvc_GetState(channel); - if (!state) { - goto out; - } - - netif = PvtcpStateFindIf(state, conf); - if (!netif) { - goto out; - } - - CommOS_ListDel(&sock->ifLink); - sock->netif = netif; - CommOS_ListAddTail(&netif->sockList, &sock->ifLink); - rc = 0; - -out: - CommSvc_Unlock(channel); - return rc; -} - - -/** - * @brief Removes a socket from its netif. - * It locks the must-be-active channel. We use that lock to guard - * against concurrent removal of the netif. - * @param[in,out] channel channel to remove from - * @param[in,out] sock socket to remove - * @return zero if successful, -1 otherwise - */ - -int -PvtcpStateRemoveSocket(CommChannel channel, - PvtcpSock *sock) -{ - if (!channel || !sock || - (sock->channel && (sock->channel != channel))) { - return -1; - } - - if (CommSvc_Lock(channel)) { - return -1; /* channel isn't active. */ - } - - CommOS_ListDel(&sock->ifLink); - sock->channel = NULL; - sock->state = NULL; - sock->netif = NULL; - - CommSvc_Unlock(channel); - return 0; -} - - -/** - * @brief State constructor called when a channel is created. The netifs - * 'death row' and 'unbound' are always initialized. - * @param[in,out] channel channel to initialize - * @return pointer to a new state structure or NULL - * @sideeffect Allocates memory - */ - -void * -PvtcpStateAlloc(CommChannel channel) -{ - PvtcpState *state; - - state = CommOS_Kmalloc(sizeof *state); - if (state) { - state->channel = channel; - INIT_LIST_HEAD(&state->ifList); - - /* Initialize always-present netifs. */ - INIT_LIST_HEAD(&state->ifDeathRow.stateLink); /* Irrelevant */ - INIT_LIST_HEAD(&state->ifDeathRow.sockList); - state->ifDeathRow.state = state; - state->ifDeathRow.conf.family = PVTCP_PF_DEATH_ROW; - - INIT_LIST_HEAD(&state->ifUnbound.stateLink); /* Irrelevant */ - INIT_LIST_HEAD(&state->ifUnbound.sockList); - state->ifUnbound.state = state; - state->ifUnbound.conf.family = PVTCP_PF_UNBOUND; - - INIT_LIST_HEAD(&state->ifLoopbackInet4.stateLink); /* Irrelevant */ - INIT_LIST_HEAD(&state->ifLoopbackInet4.sockList); - state->ifLoopbackInet4.state = state; - state->ifLoopbackInet4.conf.family = PVTCP_PF_LOOPBACK_INET4; - - state->namespace = NULL; - state->mask = ((unsigned int)channel << 4) ^ (unsigned int)state; -#if defined(__linux__) - state->id = ((unsigned long long)random32() << 32) | - (unsigned long long)random32(); -#else - state->id = (unsigned long long)state; -#endif - } - return state; -} - - -/** - * @brief State destructor called when a channel is closed. - * The caller (Comm) guarantees proper locking. - * @param arg pointer to state structure - * @sideeffect Destroys all netifs and their sockets, deallocates memory - */ - -void -PvtcpStateFree(void *arg) -{ - PvtcpState *state = arg; - PvtcpIf *netif; - PvtcpIf *tmp; - - if (state) { - CommOS_ListForEachSafe(&state->ifList, netif, tmp, stateLink) { - IfFree(netif); - } - /* coverity[address_free] */ - IfFree(&state->ifLoopbackInet4); - /* coverity[address_free] */ - IfFree(&state->ifUnbound); - /* coverity[address_free] */ - IfFree(&state->ifDeathRow); - CommOS_Kfree(state); - } -} - - -/** - * @brief Checks transport arguments. - * @param transpArgs transport arguments. - * @return zero if successful, < 0 otherwise. - */ - -int -PvtcpCheckArgs(CommTranspInitArgs *transpArgs) -{ - int rc = -1; - const unsigned int minCapacity = - (PVTCP_SOCK_BUF_SIZE + sizeof(CommPacket)) * 2; - unsigned int versionIndex = pvtcpVersionsSize; - - if (transpArgs->capacity < minCapacity) { - return rc; - } - - while (versionIndex--) { - if (transpArgs->type == CommTransp_GetType(pvtcpVersions[versionIndex])) { - /* If a match, overwrite the hash with the actual version (index). */ - - transpArgs->type = versionIndex; - rc = 0; - break; - } - } - - return rc; -} - - -/** - * @brief Called after a channel is freed. - * @param ntfData callback data from implementation block. - * @param transpArgs transport arguments of closed channel. - * @param inBH whether called in bottom half. - */ - -void -PvtcpCloseNtf(void *ntfData, - const CommTranspInitArgs *transpArgs, - int inBH) -{ - CommImpl *impl = (CommImpl *)ntfData; - - pvtcpClientChannel = NULL; - CommOS_Log(("%s: Channel was reset!\n", __FUNCTION__)); - - /* - * If the impl. block owner is NULL, we're pv client: we attempt to - * reopen the channel in a few seconds. - */ - - if (impl && !impl->owner && !inBH) { - CommOS_Log(("%s: Attempting to re-initialize channel.\n", __FUNCTION__)); - impl->openAtMillis = CommOS_GetCurrentMillis(); - impl->openTimeoutAtMillis = - CommOS_GetCurrentMillis() + PVTCP_CHANNEL_OPEN_TIMEOUT; - if (CommSvc_Alloc(transpArgs, impl, inBH, &pvtcpClientChannel)) { - CommOS_Log(("%s: Failed to initialize channel!\n", __FUNCTION__)); - } - } -} - - -/** - * @brief Initializes the Pvtcp socket common fields. - * @param pvsk pvtcp socket. - * @param channel Comm channel this socket is associated with. - * @return 0 if successful, -1 otherwise. - */ - -int -PvtcpSockInit(PvtcpSock *pvsk, - CommChannel channel) -{ - PvtcpState *state; - int rc = -1; - - if (pvsk && channel && (state = CommSvc_GetState(channel))) { - /* Must _not_ zero out pvsk! */ - - CommOS_MutexInit(&pvsk->inLock); - CommOS_MutexInit(&pvsk->outLock); - CommOS_SpinlockInit(&pvsk->stateLock); - CommOS_ListInit(&pvsk->ifLink); - CommOS_InitWork(&pvsk->work, PvtcpProcessAIO); - pvsk->netif = NULL; - pvsk->state = state; - pvsk->stateID = state->id; - pvsk->channel = channel; - pvsk->peerSock = PVTCP_PEER_SOCK_NULL; - pvsk->peerSockSet = 0; - CommOS_WriteAtomic(&pvsk->deltaAckSize, - (1 << PVTCP_SOCK_SMALL_ACK_ORDER)); - CommOS_WriteAtomic(&pvsk->rcvdSize, 0); - CommOS_WriteAtomic(&pvsk->sentSize, 0); - CommOS_WriteAtomic(&pvsk->queueSize, 0); - CommOS_ListInit(&pvsk->queue); - pvsk->rpcReply = NULL; - pvsk->rpcStatus = 0; - pvsk->err = 0; - rc = 0; - } - return rc; -} diff --git a/arch/arm/mvp/pvtcpkm/pvtcp.h b/arch/arm/mvp/pvtcpkm/pvtcp.h deleted file mode 100644 index 7f4f2f5..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp.h +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Pvtcp common APIs. - */ - -#ifndef _PVTCP_H_ -#define _PVTCP_H_ - -/* - * Pvtcp state store ipv4 and ipv6 address structures. - * Platform-specific headers where these are defined, must be included here. - * Implementation-related header files should not be included in this file. - * - * NOTE: Pvtcp is not an API and none of its functions are exported. - */ - -#if defined(__linux__) -#include <linux/in.h> -#include <linux/in6.h> -#else -#error "Unsupported OS." -#endif - -#include "comm_svc.h" - -/* Max time to wait for a channel to be created. */ -#define PVTCP_CHANNEL_OPEN_TIMEOUT 2000 - -/* Max payload size. Used to allocate offload per-cpu bounce buffers. */ -#define PVTCP_SOCK_BUF_SIZE (8 << 10) /* 8K */ - -#define PVTCP_SOCK_DGRAM_BUF_SIZE PVTCP_SOCK_BUF_SIZE -#define PVTCP_SOCK_STREAM_BUF_SIZE PVTCP_SOCK_BUF_SIZE - -/* Dgram payloads include a pseudo (udp/ip) header. */ -typedef struct PvtcpDgramPseudoHeader { - unsigned long long d0; - unsigned long long d1; - unsigned long long d2; - unsigned long long d3; -} PvtcpDgramPseudoHeader; - - -/* - * Flow control constants for pv/offload sockets. - * We are defining a receive size model: 1) small, 2) medium, 3)large. - * This seems sufficient in addressing most target environments, but more - * models may be defined. A smaller minimum model (1) cannot be defined. - * - * Short description of socket-level flow control. This applies to both - * dgram and stream sockets, in both directions. It follows that, with regard - * to 'comm' writes, dgram and stream writes are: a) lossless and b) ordered. - * - * 0. Both sides (offload, pv) of a socket maintain (almost) mirror values - * of input/output queue sizes. We say 'almost', because they're allowed - * to conservatively converge in time. - * 1. Senders never write out to the shmem channel, and destined to a socket - * (be it offload or pv), more bytes than that socket can hold/enqueue. - * This is based on socket fields storing information mentioned above. - * The upper limit is PVTCP_SOCK_RCVSIZE and cannot be exceeded under - * any circumstances. - * 2. There is a 'safe' limit value (per socket) which can be tested prior - * to writing one more max-sized packet to that socket. - * This value is PVTCP_SOCK_SAFE_RCVSIZE. - * 3. There is also a notion of 'large' acks, which controls the frequency of - * reporting socket queue size changes when bytes are consumed from it. - * When a sender is about to write out (to the channel, for a given socket) - * in excess of PVTCP_SOCK_LARGE_ACK_WM bytes, it sets, in the packet - * header flag field, the PVTCP_SOCK_LARGE_ACK_ORDER value. The other end - * updates its 'delta ack' value accordingly (1 << flag value). - * 4. As bytes are consumed (again, at either end), the operation or function, - * will send a size ack packet with the consumed size since the last ack, - * _iff_ that size is larger than, or equal to the 'delta ack' value. - * If an ack was sent, the 'delta ack' is decreased by half, to a minimum - * indicated by PVTCP_SOCK_SMALL_ACK_ORDER. - * Note that concurrently setting the 'delta ack' to its high value - * because of condition 3) above, is fine since the sender already has, - * or is about to put pressure on the socket. - */ - -#if !defined(PVTCP_SOCK_RCVSIZE_MODEL) - #define PVTCP_SOCK_RCVSIZE_MODEL 1 -#endif - -#if PVTCP_SOCK_RCVSIZE_MODEL == 1 - #define PVTCP_SOCK_LARGE_ACK_WM (64 << 10) /* 64K */ - #define PVTCP_SOCK_LARGE_ACK_ORDER 15 - #define PVTCP_SOCK_SMALL_ACK_ORDER 11 - #define PVTCP_SOCK_SAFE_RCVSIZE (128 << 10) /* 128K */ -#elif PVTCP_SOCK_RCVSIZE_MODEL == 2 - #define PVTCP_SOCK_LARGE_ACK_WM (128 << 10) /* 128K */ - #define PVTCP_SOCK_LARGE_ACK_ORDER 16 - #define PVTCP_SOCK_SMALL_ACK_ORDER 12 - #define PVTCP_SOCK_SAFE_RCVSIZE (256 << 10) /* 256K */ -#elif PVTCP_SOCK_RCVSIZE_MODEL == 3 - #define PVTCP_SOCK_LARGE_ACK_WM (128 << 10) /* 128K */ - #define PVTCP_SOCK_LARGE_ACK_ORDER 16 - #define PVTCP_SOCK_SMALL_ACK_ORDER 12 - #define PVTCP_SOCK_SAFE_RCVSIZE (512 << 10) /* 512K */ -#else - #error "Invalid PVTCP_SOCK_RCVSIZE_MODEL (one of 1, 2, 3)" -#endif - -#define PVTCP_SOCK_RCVSIZE \ - (PVTCP_SOCK_SAFE_RCVSIZE + \ - PVTCP_SOCK_BUF_SIZE + sizeof (PvtcpDgramPseudoHeader)) - - -/* - * Operation codes - */ - -enum PvtcpOpCodes { - PVTCP_OP_FLOW = 0, - PVTCP_OP_IO, - PVTCP_OP_CREATE, - PVTCP_OP_RELEASE, - PVTCP_OP_BIND, - PVTCP_OP_LISTEN, - PVTCP_OP_ACCEPT, - PVTCP_OP_CONNECT, - PVTCP_OP_SHUTDOWN, - PVTCP_OP_SETSOCKOPT, - PVTCP_OP_GETSOCKOPT, - PVTCP_OP_IOCTL, - PVTCP_OP_INVALID -}; - -#define PVTCP_FLOW_OP_INVALID_SIZE 0xffffffff - - -/* - * Operation functions - */ - -COMM_DEFINE_OP(PvtcpFlowOp); -COMM_DEFINE_OP(PvtcpIoOp); -COMM_DEFINE_OP(PvtcpCreateOp); -COMM_DEFINE_OP(PvtcpReleaseOp); -COMM_DEFINE_OP(PvtcpBindOp); -COMM_DEFINE_OP(PvtcpListenOp); -COMM_DEFINE_OP(PvtcpAcceptOp); -COMM_DEFINE_OP(PvtcpConnectOp); -COMM_DEFINE_OP(PvtcpShutdownOp); -COMM_DEFINE_OP(PvtcpSetSockOptOp); -COMM_DEFINE_OP(PvtcpGetSockOptOp); -COMM_DEFINE_OP(PvtcpIoctlOp); - - -/* - * Pvtcp/Comm type and supported versions. - */ - -#define PVTCP_COMM_IMPL_TYPE "com.vmware.comm.protocol.pvTCP@" - -#define PVTCP_COMM_IMPL_VERS_1_0 (PVTCP_COMM_IMPL_TYPE "1.0") -#define PVTCP_COMM_IMPL_VERS_1_1 (PVTCP_COMM_IMPL_TYPE "1.1") - -typedef enum { - PVTCP_VERS_1_0 = 0, - PVTCP_VERS_1_1 -} PvtcpVersion; - -extern const char *pvtcpVersions[]; -extern const unsigned int pvtcpVersionsSize; - - -/* - * State interface markers - */ - -#define PVTCP_PF_UNBOUND 0x0 -#define PVTCP_PF_DEATH_ROW 0xffffffff -#define PVTCP_PF_LOOPBACK_INET4 (PVTCP_PF_DEATH_ROW - 1) - - -/* - * Interface and interface configuration structures. - */ - -typedef struct PvtcpIfConf { - int family; // Values: - // unbound (PVTCP_PF_UNBOUND) - // deathRow (PVTCP_PF_DEATH_ROW) - // loopback (PVTCP_PF_LOOPBACK_INET4) - // inet4 (PF_INET) - // inet6 (PF_INET6) - union { - struct in_addr in; - struct in6_addr in6; - } addr; // inet4 or inet6 address. - union { - struct in_addr in; - struct in6_addr in6; - } mask; // inet4 or inet6 netmask. -} PvtcpIfConf; - - -struct PvtcpState; - -typedef struct PvtcpIf { - CommOSList sockList; // List of sockets. - CommOSList stateLink; // Link in PvtcpState.ifList. - struct PvtcpState *state; // Back reference to state. - PvtcpIfConf conf; // Interface configuration. -} PvtcpIf; - - -/* - * General pvtcp state associated with a channel. - */ - -typedef struct PvtcpState { - unsigned long long id; // Randomly generated state ID. - CommOSList ifList; // List of active interfaces. - CommChannel channel; // Comm channel back reference. - PvtcpIf ifDeathRow; // Always-present netif. - PvtcpIf ifUnbound; // Ditto. - PvtcpIf ifLoopbackInet4; // Ditto. - void *namespace; // Name space, where supported. - void *extra; // Used by upper layer to extend state as needed. - unsigned int mask; // Mask used to obfuscate socket pointers. -} PvtcpState; - - -/* - * Define pvtcp socket common fields and include the pv or offload header - * to get the right PvtcpSock definition. - */ - -#define PVTCP_SOCK_COMMON_FIELDS \ - CommOSMutex inLock; /* Input lock. */ \ - CommOSMutex outLock; /* Output lock. */ \ - CommOSSpinlock stateLock; /* State update lock. */ \ - CommOSList ifLink; /* Link in PvtcpIf.sockList. */ \ - CommOSWork work; /* Work item for AIO processing. */ \ - PvtcpIf *netif; /* Netif reference. */ \ - PvtcpState *state; /* State reference. */ \ - unsigned long long stateID; /* State ID. */ \ - CommChannel channel; /* Comm channel reference. */ \ - unsigned long long peerSock; /* Peer socket, opaque. */ \ - volatile int peerSockSet; /* Peer socket valid. */ \ - CommOSAtomic deltaAckSize; /* Recv size updates required by peer. */ \ - CommOSAtomic rcvdSize; /* Bytes received since last ack. */ \ - CommOSAtomic sentSize; /* Bytes sent; also updated by peer. */ \ - CommOSAtomic queueSize; /* Queue size. */ \ - CommOSList queue; /* Send queue (off) or recv queue (pv). */ \ - void *rpcReply; /* RPC reply. */ \ - int rpcStatus; /* RPC completion status. */ \ - int err /* Socket error. */ - -#define PVTCP_PEER_SOCK_NULL ((unsigned long long)0) - - -/* - * Helper macros - */ - -#define SOCK_STATE_LOCK(pvsk) CommOS_SpinLock(&(pvsk)->stateLock) -#define SOCK_STATE_UNLOCK(pvsk) CommOS_SpinUnlock(&(pvsk)->stateLock) - -#define SOCK_IN_TRYLOCK(pvsk) CommOS_MutexTrylock(&(pvsk)->inLock) -#define SOCK_IN_LOCK(pvsk) CommOS_MutexLock(&(pvsk)->inLock) -#define SOCK_IN_UNLOCK(pvsk) CommOS_MutexUnlock(&(pvsk)->inLock) - -#define SOCK_OUT_TRYLOCK(pvsk) CommOS_MutexTrylock(&(pvsk)->outLock) -#define SOCK_OUT_LOCK(pvsk) CommOS_MutexLock(&(pvsk)->outLock) -#define SOCK_OUT_LOCK_UNINT(pvsk) \ - CommOS_MutexLockUninterruptible(&(pvsk)->outLock) -#define SOCK_OUT_UNLOCK(pvsk) CommOS_MutexUnlock(&(pvsk)->outLock) - -#define PVTCP_UNLOCK_DISP_DISCARD_VEC() \ - CommSvc_DispatchUnlock(channel); \ - while (vecLen) { \ - PvtcpBufFree(vec[--vecLen].iov_base); \ - } - - -#if defined(PVTCP_BUILDING_SERVER) -#include "pvtcp_off.h" -#else -#include "pvtcp_pv.h" -#endif // defined(PVTCP_BUILDING_SERVER) - - -/* - * Data declarations - */ - -extern const PvtcpIfConf *pvtcpIfUnbound; -extern const PvtcpIfConf *pvtcpIfDeathRow; -extern const PvtcpIfConf *pvtcpIfLoopbackInet4; - -extern CommImpl pvtcpImpl; -extern CommOperationFunc pvtcpOperations[]; - -extern CommChannel pvtcpClientChannel; - - -/* - * Common state manipulation functions. - */ - -void *PvtcpStateAlloc(CommChannel channel); -void PvtcpStateFree(void *arg); - -int PvtcpStateAddIf(CommChannel channel, const PvtcpIfConf *conf); -void PvtcpStateRemoveIf(CommChannel channel, const PvtcpIfConf *conf); -PvtcpIf *PvtcpStateFindIf(PvtcpState *state, const PvtcpIfConf *conf); - -int -PvtcpStateAddSocket(CommChannel channel, - const PvtcpIfConf *conf, - PvtcpSock *sock); -int PvtcpStateRemoveSocket(CommChannel channel, PvtcpSock *sock); - - -/* - * Common Pvtcp functions. - */ - -int PvtcpCheckArgs(CommTranspInitArgs *transpArgs); - -void -PvtcpCloseNtf(void *ntfData, - const CommTranspInitArgs *transpArgs, - int inBH); - -void *PvtcpBufAlloc(unsigned int size); -void PvtcpBufFree(void *buf); - -void PvtcpReleaseSocket(PvtcpSock *pvsk); -int PvtcpSockInit(PvtcpSock *pvsk, CommChannel channel); - -void PvtcpProcessAIO(CommOSWork *work); - - -/** - * @brief Packs an IPV6 address stored in an array of four 32-bit elements, - * into two 64-bit variables. - * @param addr IPV6 address as an array of 32-bit elements. - * @param[out] d64_0 pointer to 64-bit variable. - * @param[out] d64_1 pointer to 64-bit variable. - */ - -static inline void -PvtcpI6AddrPack(const unsigned int addr[4], - unsigned long long *d64_0, - unsigned long long *d64_1) -{ - *d64_0 = *(unsigned long long *)&addr[0]; - *d64_1 = *(unsigned long long *)&addr[2]; -} - - -/** - * @brief Unpacks two 64-bit values into an IPV6 address-storing array of - * four 32-bit elements, - * @param[out] addr IPV6 address as an array of 32-bit elements. - * @param d64_0 64-bit value. - * @param d64_1 64-bit value. - */ - -static inline void -PvtcpI6AddrUnpack(unsigned int addr[4], - unsigned long long d64_0, - unsigned long long d64_1) -{ - *(unsigned long long *)&addr[0] = d64_0; - *(unsigned long long *)&addr[2] = d64_1; -} - - -/** - * @brief Verifies whether the argument is a valid socket. If yes, it returns - * the actual pointer. Otherwise, it returns from the calling function. - * WARNING: This macro must ONLY be used in operation functions, as its - * implementation assumes. - * @param handle socket handle to verify. - * @param container state supposed to contain the socket handle. - * @return 32-bit or 64-bit PvtcpSock*, depending on __LP64__ or __LLP64__. - */ - -#if defined(__LP64__) || defined(__LLP64__) - -#define PvtcpGetPvskOrReturn(handle, container) \ - ({ \ - PvtcpState *__state = (PvtcpState *)(container); \ - PvtcpSock *__pvsk = \ - (PvtcpSock *)((handle) ^ (unsigned long long)__state->mask); \ - \ - if (__pvsk->stateID != __state->id) { \ - PVTCP_UNLOCK_DISP_DISCARD_VEC(); \ - CommSvc_Zombify(__state->channel, 0); \ - return; \ - } \ - (__pvsk); \ - }) - -#else // __LP64__ || __LLP64__ - -#define PvtcpGetPvskOrReturn(handle, container) \ - ({ \ - PvtcpState *__state = (PvtcpState *)(container); \ - PvtcpSock *__pvsk = \ - (PvtcpSock *)((unsigned int)(handle) ^ __state->mask); \ - \ - if (__pvsk->stateID != __state->id) { \ - PVTCP_UNLOCK_DISP_DISCARD_VEC(); \ - CommSvc_Zombify(__state->channel, 0); \ - return; \ - } \ - (__pvsk); \ - }) - -#endif // __LP64__ || __LLP64__ - - -/** - * @brief Masks a socket pointer to be passed to the peer module. - * @param pvsk socket pointer to mask. - * @return 64-bit pvtcp socket handle. - */ - -#if defined(__LP64__) || defined(__LLP64__) - -#define PvtcpGetHandle(pvsk) \ - ((unsigned long long)(pvsk) ^ (unsigned long long)(pvsk)->state->mask) - -#else // __LP64__ || __LLP64__ - -#define PvtcpGetHandle(pvsk) \ - ((unsigned int)(pvsk) ^ (pvsk)->state->mask) - -#endif // __LP64__ || __LLP64__ - -#endif // _PVTCP_H_ diff --git a/arch/arm/mvp/pvtcpkm/pvtcp_off.c b/arch/arm/mvp/pvtcpkm/pvtcp_off.c deleted file mode 100644 index 053d9c2..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp_off.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Server (offload) side code. - */ - -#include "pvtcp.h" - -/** - * @brief Allocates the net buffer. - * @param size buffer size - * @return address of buffer or NULL - */ -void * -PvtcpBufAlloc(unsigned int size) -{ - PvtcpOffBuf *buf; - - /* coverity[alloc_fn] */ - /* coverity[var_assign] */ - buf = CommOS_Kmalloc(size + sizeof *buf - sizeof buf->data); - if (buf) { - CommOS_ListInit(&buf->link); - buf->len = (unsigned short)size; - buf->off = 0; - return PvtcpOffBufFromInternal(buf); - } - return NULL; -} - - -/** - * @brief Deallocates given net buffer. - * @param buf buffer to deallocate - * @sideeffect Frees memory - */ - -void -PvtcpBufFree(void *buf) -{ - CommOS_Kfree(PvtcpOffInternalFromBuf(buf)); -} - - -/** - * @brief Initializes the Pvtcp socket offload common fields. - * @param pvsk pvtcp socket. - * @param channel Comm channel this socket is associated with. - * @return 0 if successful, -1 otherwise. - */ - -int -PvtcpOffSockInit(PvtcpSock *pvsk, - CommChannel channel) -{ - int rc = PvtcpSockInit(pvsk, channel); - - pvsk->opFlags = 0; - pvsk->flags = 0; - return rc; -} diff --git a/arch/arm/mvp/pvtcpkm/pvtcp_off.h b/arch/arm/mvp/pvtcpkm/pvtcp_off.h deleted file mode 100644 index f183968..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp_off.h +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Offload common definitions. - * This file is meant to only be included via pvtcp.h. - */ - -#ifndef _PVTCP_OFF_H_ -#define _PVTCP_OFF_H_ - - -#define PVTCP_OFF_SOCK_COMMON_FIELDS \ - volatile unsigned int opFlags; /* Saves op codes as bit mask. */ \ - volatile unsigned int flags /* General purpose flags. */ - - -/* General purpose socket flags */ - -enum PvtcpOffPvskFlags { - PVTCP_OFF_PVSKF_IPV6_LOOP = 0, /* Used for IPV6 loopback morphing/reset. */ - PVTCP_OFF_PVSKF_SHUT_RD, /* Set to initiate socket recv shutdown. */ - PVTCP_OFF_PVSKF_SHUT_WR, /* Set to initiate socket send shutdown. */ - PVTCP_OFF_PVSKF_TCP_NODELAY, /* Caches the TCP_NODELAY socket option. */ - PVTCP_OFF_PVSKF_TCP_CORK, /* Caches the TCP_CORK socket option. */ - PVTCP_OFF_PVSKF_DISCONNECT, /* Set do indicate connect()/AF_UNSPEC. */ - PVTCP_OFF_PVSKF_INVALID = 32 -}; - - -/* - * Include OS-dependent PvtcpSock structure and functions. - */ - -#if defined(__linux__) -#include "pvtcp_off_linux.h" -#else -#error "Unsupported OS." -#endif - - -/* - * Offload packet payload data structure. - */ - -typedef struct PvtcpOffBuf { - CommOSList link; // Link in socket queue. - unsigned short len; - unsigned short off; - char data[1]; -} PvtcpOffBuf; - - -/** - * @brief Returns net buffer given private data structure pointer and based - * on the internal offset pointer - * @param arg pointer to PvtcpOffBuf wrapper structure - * @return address of buffer or NULL - */ - -static inline void * -PvtcpOffBufFromInternalOff(PvtcpOffBuf *arg) -{ - return arg ? - &arg->data[arg->off] : - NULL; -} - - -/** - * @brief Returns net buffer given private data structure pointer - * @param arg pointer to PvtcpOffBuf wrapper structure - * @return address of buffer or NULL - */ - -static inline void * -PvtcpOffBufFromInternal(PvtcpOffBuf *arg) -{ - return arg ? - &arg->data[0] : - NULL; -} - - -/** - * @brief Returns internal data structure given net buffer pointer - * @param arg pointer to PvtcpOffBuf wrapper structure - * @return address of internal data structure or NULL - */ - -static inline PvtcpOffBuf * -PvtcpOffInternalFromBuf(void *arg) -{ - return arg ? - (PvtcpOffBuf *)((char *)arg - offsetof(PvtcpOffBuf, data)) : - NULL; -} - - -/** - * @brief Tests operation flag for AIO processing. - * @param pvsk socket to test operation on. - * @param op operation to test if set. - * @return non-zero if operation set, zero otherwise. - * @sideeffect socket processing by AIO threads affected according to operation. - */ - -static inline int -PvskTestOpFlag(struct PvtcpSock *pvsk, - int op) -{ - return pvsk->opFlags & (1 << op); -} - - -/** - * @brief Sets operation flag for AIO processing; acquires the state lock. - * @param[in,out] pvsk socket to set operation on. - * @param op operation to set. - * @sideeffect socket processing by AIO threads affected according to operation. - */ - -static inline void -PvskSetOpFlag(struct PvtcpSock *pvsk, - int op) -{ - unsigned int ops; - - SOCK_STATE_LOCK(pvsk); - ops = pvsk->opFlags | (1 << op); - pvsk->opFlags = ops; - SOCK_STATE_UNLOCK(pvsk); -} - - -/** - * @brief Resets operation flag for AIO processing; acquires the state lock. - * @param[in,out] pvsk socket to reset operation on. - * @param op operation to reset. - * @sideeffect socket processing by AIO threads affected according to operation. - */ - -static inline void -PvskResetOpFlag(struct PvtcpSock *pvsk, - int op) -{ - unsigned int ops; - - SOCK_STATE_LOCK(pvsk); - ops = pvsk->opFlags & ~(1 << op); - pvsk->opFlags = ops; - SOCK_STATE_UNLOCK(pvsk); -} - - -/** - * @brief Tests general purpose socket flags. - * @param pvsk socket. - * @param flag flag to test. - * @return non-zero if flag set, zero otherwise. - */ - -static inline int -PvskTestFlag(struct PvtcpSock *pvsk, - int flag) -{ - return (flag < PVTCP_OFF_PVSKF_INVALID) && (pvsk->flags & (1 << flag)); -} - - -/** - * @brief Sets general purpose socket flags; acquires the state lock. - * @param[in,out] pvsk socket. - * @param flag flag to set or clear. - * @param onOff whether to set or clear the flag. - */ - -static inline void -PvskSetFlag(struct PvtcpSock *pvsk, - int flag, - int onOff) -{ - unsigned int flags; - - SOCK_STATE_LOCK(pvsk); - if (flag < PVTCP_OFF_PVSKF_INVALID) { - if (onOff) { - flags = pvsk->flags | (1 << flag); - } else { - flags = pvsk->flags & ~(1 << flag); - } - pvsk->flags = flags; - } - SOCK_STATE_UNLOCK(pvsk); -} - - -int PvtcpOffSockInit(PvtcpSock *pvsk, CommChannel channel); - -#endif // _PVTCP_OFF_H_ diff --git a/arch/arm/mvp/pvtcpkm/pvtcp_off_io_linux.c b/arch/arm/mvp/pvtcpkm/pvtcp_off_io_linux.c deleted file mode 100644 index 9958c39..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp_off_io_linux.c +++ /dev/null @@ -1,831 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Server (offload) side Linux-specific socket I/O functions. - */ - -#include "pvtcp.h" - -/* - * Data. - */ - -/* Used to check if OutputAIO()-ing is likely in progress. */ - -CommOSAtomic PvtcpOutputAIOSection; - - -/* - * Large datagram bounce buffer (PVTCP_SOCK_BUF_SIZE < size <= 64K). - * Only one such buffer is available, shared across cpus via get/put. - * A preallocated, smaller buffer is used for most over-size 'allocs'. - * A larger, 64K-buffer may need to be __vmalloc()-ed. - */ - -typedef struct LargeDgramBuf { - unsigned char buf[PVTCP_SOCK_BUF_SIZE << 1]; /* Fast buffer. */ - void *spareBuf; /* Dynamically allocated. */ - CommOSMutex lock; -} LargeDgramBuf; - -static LargeDgramBuf largeDgramBuf; - - -/** - * @brief One time initialization of large datagram buffer. - */ - -void -PvtcpOffLargeDgramBufInit(void) -{ - largeDgramBuf.spareBuf = NULL; - CommOS_MutexInit(&largeDgramBuf.lock); -} - - -/** - * @brief Reserves/holds the large datagram buffer. - * @param size size of buffer. - * @sizeeffect may sleep until the buffer is available. - * @return address of buffer, or NULL if size too large or allocation failed. - */ - -static inline void * -LargeDgramBufGet(int size) -{ - static const unsigned int maxSize = 64 * 1024; - - /* coverity[alloc_fn] */ - /* coverity[var_assign] */ - - CommOS_MutexLockUninterruptible(&largeDgramBuf.lock); - - if (size <= sizeof largeDgramBuf.buf) { - return largeDgramBuf.buf; - } - - if (size <= maxSize) { - if (!largeDgramBuf.spareBuf) { - largeDgramBuf.spareBuf = __vmalloc(maxSize, - (GFP_ATOMIC | __GFP_HIGHMEM), - PAGE_KERNEL); - } - if (largeDgramBuf.spareBuf) { - return largeDgramBuf.spareBuf; - } - } - - CommOS_MutexUnlock(&largeDgramBuf.lock); - return NULL; -} - - -/** - * @brief Releases hold on the large datagram buffer. - * @param buf buffer to put back. - */ - -static inline void -LargeDgramBufPut(void *buf) -{ - static unsigned int spareBufPuts = 0; - - BUG_ON((buf != largeDgramBuf.buf) && (buf != largeDgramBuf.spareBuf)); - - if (largeDgramBuf.spareBuf && (++spareBufPuts % 2) == 0) { - /* Deallocate the spare buffer every now and then. */ - - vfree(largeDgramBuf.spareBuf); - largeDgramBuf.spareBuf = NULL; - } - - CommOS_MutexUnlock(&largeDgramBuf.lock); -} - - -/* - * I/O offload operations. - */ - -/** - * @brief Flow control notification received when more (enough) data was - * consumed from a PV socket. - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled - */ - -void -PvtcpFlowOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - - PvtcpHoldSock(pvsk); - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - CommOS_SubReturnAtomic(&pvsk->rcvdSize, (int)packet->data32); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Outputs bytes to socket. - * @param channel communication channel with offloader. - * @param upperLayerState state associated with this channel. - * @param packet received packet header. - * @param vec payload buffer descriptors. - * @param vecLen payload buffer descriptor count. - * @sideeffect Changes send size/capacity ratio. May schedule AIO processing - * for enqueued bytes, if applicable. - */ - -void -PvtcpIoOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - int rc; - unsigned int vecOff; - PvtcpOffBuf *internalBuf; - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - unsigned int dataLen = packet->len - sizeof *packet; - struct msghdr msg = { - .msg_controllen = 0, - .msg_control = NULL - }; - int tmpSize; - int needSched = 0; - - PvtcpHoldSock(pvsk); - rc = 0; - - if (!pvsk->peerSockSet || PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_WR)) { - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - goto out; - } - - tmpSize = (int)COMM_OPF_GET_VAL(packet->flags); - if (tmpSize) { - /* It was requested that we update deltaAckSize. */ - - tmpSize = 1 << tmpSize; - CommOS_WriteAtomic(&pvsk->deltaAckSize, tmpSize); - } - - if (sk->sk_type == SOCK_STREAM) { - unsigned int queueSize = 0; - - if (!SOCK_OUT_TRYLOCK(pvsk)) { - if (pvsk->peerSockSet && - (sk->sk_state == TCP_ESTABLISHED) && - (CommOS_ReadAtomic(&pvsk->queueSize) == 0)) { - /* Attempt to write directly as many bytes as we can. */ - - msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; - rc = kernel_sendmsg(sock, &msg, vec, vecLen, dataLen); - - if (rc == -EAGAIN) { - rc = 0; - } - if (rc >= 0) { - dataLen = rc; - for (vecOff = 0; vecOff < vecLen; vecOff++) { - if (rc >= vec[vecOff].iov_len) { - /* Dispose of all fully consumed buffers. */ - - PvtcpBufFree(vec[vecOff].iov_base); - rc -= vec[vecOff].iov_len; - } else { - /* Place partly consumed / unconsumed buffers in queue. */ - - internalBuf = - PvtcpOffInternalFromBuf(vec[vecOff].iov_base); - BUG_ON(internalBuf == NULL); - if (rc > 0) { - internalBuf->len -= rc; - internalBuf->off += rc; - rc = 0; - } - CommOS_ListAddTail(&pvsk->queue, &internalBuf->link); - queueSize += internalBuf->len; - } - } - if (queueSize > 0) { - CommOS_AddReturnAtomic(&pvsk->queueSize, queueSize); - needSched = 1; - } - } else { - /* - * We never close offload sockets unless told by the PV side, - * or when the comm goes down. Getting out of sync with PV - * sockets is a dangerously bad idea. - * This is very likely an EPIPE/ECONNRESET. - */ - - dataLen = 0; - for ( vecOff = 0; vecOff < vecLen; vecOff++) { - PvtcpBufFree(vec[vecOff].iov_base); - } - } - SOCK_OUT_UNLOCK(pvsk); - } else { - SOCK_OUT_UNLOCK(pvsk); - goto enqueueBytes; - } - } else { - /* - * We enqueue the bytes for aio processing. Note that request - * level ordering is preserved since we're still under the dispatch - * lock. However, accessing 'queue' must be protected via - * the state lock to serialize with aio changes. - * Note that the struct socket *sock may have been released, but here - * we only access sk which is held (albeit potentially orphaned). - */ - - CommOSList bufList; - -enqueueBytes: - dataLen = 0; - if (pvsk->peerSockSet && (sk->sk_state == TCP_ESTABLISHED)) { - queueSize = 0; - CommOS_ListInit(&bufList); - for (vecOff = 0; vecOff < vecLen; vecOff++) { - internalBuf = PvtcpOffInternalFromBuf(vec[vecOff].iov_base); - BUG_ON(internalBuf == NULL); - CommOS_ListAddTail(&bufList, &internalBuf->link); - queueSize += internalBuf->len; - } - - if (queueSize > 0) { - SOCK_STATE_LOCK(pvsk); - CommOS_ListSpliceTail(&pvsk->queue, &bufList); - SOCK_STATE_UNLOCK(pvsk); - CommOS_AddReturnAtomic(&pvsk->queueSize, queueSize); - needSched = 1; - } - } else { - for ( vecOff = 0; vecOff < vecLen; vecOff++) { - PvtcpBufFree(vec[vecOff].iov_base); - } - } - } - } else { /* SOCK_DGRAM || SOCK_RAW */ - struct sockaddr *addr; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - int addrLen; - - /* - * Non-stream sockets don't use the send queue, packets are sent - * directly and they must _not_ be merged. - */ - - if (sk->sk_family == AF_INET) { - sin.sin_family = AF_INET; - sin.sin_port = packet->data16; - addr = (struct sockaddr *)&sin; - addrLen = sizeof sin; - sin.sin_addr.s_addr = (unsigned int)packet->data64ex; - PvtcpTestAndBindLoopbackInet4(pvsk, &sin.sin_addr.s_addr, 0); - } else { /* AF_INET6 */ - sin6.sin6_family = AF_INET6; - sin6.sin6_port = packet->data16; - addr = (struct sockaddr *)&sin6; - addrLen = sizeof sin6; - PvtcpTestAndBindLoopbackInet6(pvsk, &packet->data64ex, - &packet->data64ex2, 0); - PvtcpI6AddrUnpack(&sin6.sin6_addr.s6_addr32[0], - packet->data64ex, packet->data64ex2); - } - msg.msg_flags = packet->data32 | MSG_DONTWAIT | MSG_NOSIGNAL; - msg.msg_name = addr; - msg.msg_namelen = addrLen; - - if (pvsk->peerSockSet) { - /* - * Flow-control already done, based on PVTCP_SOCK_SAFE_RCVSIZE, just - * as with stream sockets. Meaning that we block the senders in the - * guest (if applicable). - * - * The send buffer size was set high enough, at socket creation time, - * to avoid dropping datagrams during the (non-blocking) write. - */ - - if (vecLen == 0) { - /* - * Allow zero-sized datagram sending. - */ - - struct kvec dummy = { .iov_base = NULL, .iov_len = 0 }; - - rc = kernel_sendmsg(sock, &msg, &dummy, 0, 0); - if (rc != dummy.iov_len) { -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Dgram [0x%p] sent [%d], expected [%d]\n", - __FUNCTION__, sk, rc, dummy.iov_len)); -#endif - if (rc == -EAGAIN) { /* As if lost on the wire. */ - rc = 0; - } - } - } - - for (vecOff = 0; vecOff < vecLen; vecOff++) { - rc = kernel_sendmsg(sock, &msg, &vec[vecOff], 1, - vec[vecOff].iov_len); - PvtcpBufFree(vec[vecOff].iov_base); - if (rc != vec[vecOff].iov_len) { -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Dgram [0x%p] sent [%d], expected [%d]\n", - __FUNCTION__, sk, rc, vec[vecOff].iov_len)); -#endif - if (rc == -EAGAIN) { /* As if lost on the wire. */ - rc = 0; - } - } - } - - if (COMM_OPF_TEST_ERR(packet->flags)) { - /* PV client wants an automatic bind. */ - - PvskSetOpFlag(pvsk, PVTCP_OP_BIND); - PvtcpSchedSock(pvsk); - } - } else { - for ( vecOff = 0; vecOff < vecLen; vecOff++) { - PvtcpBufFree(vec[vecOff].iov_base); - } - } - } - CommSvc_DispatchUnlock(channel); - -out: - if (rc < 0) { - pvsk->err = -rc; - } - tmpSize = CommOS_AddReturnAtomic(&pvsk->sentSize, dataLen); - if ((tmpSize >= CommOS_ReadAtomic(&pvsk->deltaAckSize)) || - pvsk->err || needSched) { - if (CommOS_AddReturnAtomic(&PvtcpOutputAIOSection, 1) == 1) { - /* OutputAIO() (likely) not running. */ - - PvtcpSchedSock(pvsk); - } - CommOS_SubReturnAtomic(&PvtcpOutputAIOSection, 1); - } - - PvtcpPutSock(pvsk); -} - - -/* - * AI/O functions called from the main AIO processing function. - */ - -/** - * @brief Processes socket flow control acks and error notifications in an - * AIO thread. This function is called with the socket 'in' lock taken. - * @param[in,out] pvsk socket to process. - * @param err non-zero if offload was closed, zero otherwise. - * @sideeffect May resume PV socket sending or raise errors. - */ - -void -PvtcpFlowAIO(PvtcpSock *pvsk, - int err) -{ - CommPacket packet = { .flags = 0 }; - unsigned long long timeout; - int tmpSize; - - COMM_OPF_CLEAR_ERR(packet.flags); - packet.data32 = PVTCP_FLOW_OP_INVALID_SIZE; - if (pvsk->err || err) { - COMM_OPF_SET_ERR(packet.flags); - packet.data32ex = !pvsk->err ? 0 : xchg(&pvsk->err, 0); - if (!packet.data32ex) { - packet.data32ex = -err; - } -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sending socket error [%u] on [0x%p -> 0x%0x].\n", - __FUNCTION__, packet.data32ex, pvsk, - (unsigned)(pvsk->peerSock))); -#endif - } else { - SOCK_STATE_LOCK(pvsk); - tmpSize = CommOS_ReadAtomic(&pvsk->deltaAckSize); - if (CommOS_ReadAtomic(&pvsk->sentSize) >= tmpSize) { - if ((SkFromPvsk(pvsk)->sk_type != SOCK_STREAM) && - !sock_writeable(SkFromPvsk(pvsk))) { - /* Don't send dgram flow op until WriteSpaceCB tells us to do so. */ - - packet.data32 = PVTCP_FLOW_OP_INVALID_SIZE; - } else { - packet.data32 = CommOS_ReadAtomic(&pvsk->sentSize); - CommOS_WriteAtomic(&pvsk->sentSize, 0); - if (tmpSize > (1 << (PVTCP_SOCK_SMALL_ACK_ORDER + 1))) { - tmpSize >>= 1; - CommOS_WriteAtomic(&pvsk->deltaAckSize, tmpSize); - } - } - } - SOCK_STATE_UNLOCK(pvsk); - packet.data32ex = 0; - } - - if (((packet.data32 != PVTCP_FLOW_OP_INVALID_SIZE) || - COMM_OPF_TEST_ERR(packet.flags)) && - pvsk->peerSockSet) { - packet.len = sizeof packet; - packet.opCode = PVTCP_OP_FLOW; - packet.data64 = pvsk->peerSock; - timeout = COMM_MAX_TO; - CommSvc_Write(pvsk->channel, &packet, &timeout); - } -} - - -/** - * @brief Processes queued socket output in an AIO thread. This function is - * called with the socket 'out' lock taken. - * @param[in,out] pvsk socket to process. - * @sideeffect Changes send size/capacity ratio. - */ - -void -PvtcpOutputAIO(PvtcpSock *pvsk) -{ - struct sock *sk; - struct socket *sock; - PvtcpOffBuf *internalBuf; - PvtcpOffBuf *tmp; - CommOSList queue; -#define VEC_SIZE 32 - struct kvec vec[VEC_SIZE]; - unsigned int vecLen; - unsigned int dataLen; - struct msghdr msg = { - .msg_controllen = 0, - .msg_control = NULL, - .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL - }; - int queueDelta = 0; - int done = 0; - int rc; - - sk = SkFromPvsk(pvsk); - if (!sk) { - /* This is an error socket, we don't process it. */ - - return; - } - - sock = sk->sk_socket; - -again: - CommOS_AddReturnAtomic(&PvtcpOutputAIOSection, 1); - while (!done && CommOS_ReadAtomic(&pvsk->queueSize) > 0) { - /* Note: only stream sockets can have a positive send queue size. - * Similar to PvtcpIoOp: we must check if sock (struct socket *) is - * still valid. - */ - - /* Take the current queue private. */ - - SOCK_STATE_LOCK(pvsk); - queue = pvsk->queue; - if (CommOS_ListEmpty(&queue)) { - SOCK_STATE_UNLOCK(pvsk); - return; - } - queue.next->prev = &queue; - queue.prev->next = &queue; - CommOS_ListInit(&pvsk->queue); - SOCK_STATE_UNLOCK(pvsk); - - vecLen = 0; - dataLen = 0; - - if (sk->sk_state == TCP_ESTABLISHED) { - CommOS_ListForEach(&queue, internalBuf, link) { - if (vecLen == VEC_SIZE) { - break; - } - vec[vecLen].iov_base = PvtcpOffBufFromInternalOff(internalBuf); - vec[vecLen].iov_len = internalBuf->len; - dataLen += internalBuf->len; - vecLen++; - } - - rc = kernel_sendmsg(sock, &msg, vec, vecLen, dataLen); - - if (rc == -EAGAIN) { - rc = 0; - } - if (rc >= 0) { - /* If we wrote anything, dispose of the buffers in question. */ - - queueDelta = rc; - if (queueDelta > 0) { - CommOS_ListForEachSafe(&queue, internalBuf, tmp, link) { - if (rc >= internalBuf->len) { - rc -= internalBuf->len; - CommOS_ListDel(&internalBuf->link); - PvtcpBufFree(PvtcpOffBufFromInternal(internalBuf)); - } else { - internalBuf->len -= rc; - internalBuf->off += rc; - break; - } - } - } - if (!CommOS_ListEmpty(&queue)) { - /* Add the remaining bytes to the beginning of the queue. */ - - SOCK_STATE_LOCK(pvsk); - CommOS_ListSplice(&pvsk->queue, &queue); - SOCK_STATE_UNLOCK(pvsk); - } - if (queueDelta == 0) { - /* Bail out if no bytes written, WriteSpaceCB() will resched. */ - - done = 1; - break; - } - CommOS_AddReturnAtomic(&pvsk->sentSize, queueDelta); - CommOS_SubReturnAtomic(&pvsk->queueSize, queueDelta); - } else { - /* - * Very likely, this is due to the socket being closed, so fine. - */ - - goto discardOutput; - } - } else { - /* Dispose of all buffers in the queue and mark it empty. */ - -discardOutput: - if (!CommOS_ListEmpty(&queue)) { - CommOS_ListForEachSafe(&queue, internalBuf, tmp, link) { - CommOS_ListDel(&internalBuf->link); - PvtcpBufFree(PvtcpOffBufFromInternal(internalBuf)); - } - } - CommOS_WriteAtomic(&pvsk->queueSize, 0); - break; - } - } - if (CommOS_SubReturnAtomic(&PvtcpOutputAIOSection, 1) > 0) { - if (!done) { - goto again; - } - } - - if (PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_WR)) { - kernel_sock_shutdown(sock, SHUT_WR); - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_WR, 0); - } -#undef VEC_SIZE -} - - -/** - * @brief Processes socket input in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket to process. - * @param[in,out] perCpuBuf per-cpu socket read buffer. - * @return zero if eof was not detected, non-zero otherwise. - * @sideeffect Changes receive size/capacity ratio. - */ - -int -PvtcpInputAIO(PvtcpSock *pvsk, - void *perCpuBuf) -{ - struct sock *sk; - struct socket *sock; - int err = 0; - CommPacket packet = { - .opCode = PVTCP_OP_IO - }; - unsigned long long timeout; - - sk = SkFromPvsk(pvsk); - if (!sk) { - /* IO processing is skipped on socket create-error sockets. */ - - return -1; - } - if (!perCpuBuf) { - /* No read buffer. */ - - return -1; - } - - sock = sk->sk_socket; - packet.data64 = pvsk->peerSock; - COMM_OPF_CLEAR_ERR(packet.flags); - - if (sk->sk_state == TCP_LISTEN) { - /* Process stream listen 'input'. */ - - packet.len = sizeof packet; - packet.data16 = sk->sk_ack_backlog; - timeout = COMM_MAX_TO; - if (pvsk->peerSockSet) { - CommSvc_Write(pvsk->channel, &packet, &timeout); - CommOS_Debug(("%s: Listen sock [0x%p] 'ack_backlog' [%hu].\n", - __FUNCTION__, sk, packet.data16)); - } - } else { - /* Common path for both stream and datagram sockets. */ - - int rc; - int tmpSize; - struct kvec vec[2]; - void *ioBuf = perCpuBuf; - struct kvec *inVec; - unsigned int inVecLen; - unsigned int iovOffset = 0; - unsigned int inputSize = 0; - unsigned int coalescingSize = PVTCP_SOCK_RCVSIZE >> 2; - struct sockaddr_in sin = { .sin_family = AF_INET }; - struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6 }; - struct msghdr msg = { - .msg_controllen = 0, - .msg_control = NULL, - .msg_flags = MSG_DONTWAIT - }; - int tmpFlags = msg.msg_flags; - PvtcpDgramPseudoHeader dgramHeader; - - tmpSize = CommOS_ReadAtomic(&pvsk->rcvdSize); - while ((tmpSize < PVTCP_SOCK_SAFE_RCVSIZE) && pvsk->peerSockSet) { - if (ioBuf != perCpuBuf) { - LargeDgramBufPut(ioBuf); - ioBuf = perCpuBuf; - } - vec[0].iov_base = (char *)ioBuf; - - if (sk->sk_type == SOCK_STREAM) { - if (PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_RD)) { - break; - } - - msg.msg_name = NULL; - msg.msg_namelen = 0; - vec[0].iov_len = PVTCP_SOCK_STREAM_BUF_SIZE; - } else { /* SOCK_DGRAM || SOCK_RAW */ - if (sk->sk_family == AF_INET) { - msg.msg_name = &sin; - msg.msg_namelen = sizeof sin; - } else { - msg.msg_name = &sin6; - msg.msg_namelen = sizeof sin6; - } - - /* - * Check if datagram larger than the per cpu buffer; if so, - * allocate a large enough buffer. This should happen quite - * rarely, as well-behaved applications don't rely on IP - * fragmentation to accommodate large sizes. - */ - - vec[0].iov_len = 1; - msg.msg_flags |= (MSG_PEEK | MSG_TRUNC); - rc = kernel_recvmsg(sock, &msg, vec, 1, 1, msg.msg_flags); - if (rc < 0) { - break; - } - msg.msg_flags = tmpFlags; - if (rc > PVTCP_SOCK_DGRAM_BUF_SIZE) { - /* - * Track large datagram allocations, whether allocation succeeds - * or not. No need for atomic overhead, approximating is OK. - */ - - pvtcpOffDgramAllocations++; - ioBuf = LargeDgramBufGet(rc); - if (!ioBuf) { - /* - * We reset it to the per-cpu buffer such that we can still - * consume the datagram in the next recvmsg, which will set - * MSG_TRUNC so we won't put it on the channel. - */ - - CommOS_Debug(("%s: Dropping datagram (alloc failure)!\n", - __FUNCTION__)); - ioBuf = perCpuBuf; - vec[0].iov_len = PVTCP_SOCK_DGRAM_BUF_SIZE; - } else { - vec[0].iov_len = rc; - } - } else { - vec[0].iov_len = PVTCP_SOCK_DGRAM_BUF_SIZE; - } - vec[0].iov_base = (char *)ioBuf; - } - - rc = kernel_recvmsg(sock, &msg, vec, 1, vec[0].iov_len, msg.msg_flags); - if (rc < 0) { - break; - } - - if ((rc == 0) && (sk->sk_type == SOCK_STREAM)) { - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_RD, 1); - err = -ECONNRESET; - break; - } - - if (msg.msg_flags & MSG_TRUNC) { - continue; - } - - inputSize += rc; - tmpSize = CommOS_AddReturnAtomic(&pvsk->rcvdSize, rc); - if (tmpSize >= PVTCP_SOCK_LARGE_ACK_WM) { - COMM_OPF_SET_VAL(packet.flags, PVTCP_SOCK_LARGE_ACK_ORDER); - } else { - COMM_OPF_SET_VAL(packet.flags, 0); - } - - if (sk->sk_type == SOCK_STREAM) { - vec[0].iov_base = ioBuf; - vec[0].iov_len = rc; - inVecLen = 1; - packet.len = sizeof packet + rc; - } else { /* SOCK_DGRAM || SOCK_RAW */ - if (sk->sk_family == AF_INET) { - dgramHeader.d0 = (unsigned long long)sin.sin_port; - PvtcpResetLoopbackInet4(pvsk, &sin.sin_addr.s_addr); - dgramHeader.d1 = (unsigned long long)sin.sin_addr.s_addr; - } else { /* AF_INET6 */ - dgramHeader.d0 = (unsigned long long)sin6.sin6_port; - PvtcpResetLoopbackInet6(pvsk, &sin6.sin6_addr); - PvtcpI6AddrPack(&sin6.sin6_addr.s6_addr32[0], - &dgramHeader.d1, &dgramHeader.d2); - } - vec[0].iov_base = &dgramHeader; - vec[0].iov_len = sizeof dgramHeader; - vec[1].iov_base = ioBuf; - vec[1].iov_len = rc; - inVecLen = 2; - packet.len = sizeof packet + sizeof dgramHeader + rc; - } - - inVec = vec; - timeout = COMM_MAX_TO; - rc = CommSvc_WriteVec(pvsk->channel, &packet, - &inVec, &inVecLen, &timeout, &iovOffset); - if (rc != packet.len) { - CommOS_Log(("%s: BOOG -- WROTE INCOMPLETE PACKET [%u->%d]!\n", - __FUNCTION__, packet.len, rc)); - break; - } - - /* - * If the write failed, we could print a warning. But if this - * happened, the comm channel went down. - */ - if (inputSize >= coalescingSize) { - PvtcpSchedSock(pvsk); /* We must schedule ourselves back in. */ - break; - } - } - if (ioBuf != perCpuBuf) { - LargeDgramBufPut(ioBuf); - } - } - return err; -} diff --git a/arch/arm/mvp/pvtcpkm/pvtcp_off_linux.c b/arch/arm/mvp/pvtcpkm/pvtcp_off_linux.c deleted file mode 100644 index 047547f..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp_off_linux.c +++ /dev/null @@ -1,2858 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Server (offload) side Linux-specific functions and callbacks. - */ - - -#include "pvtcp.h" - -#if defined(CONFIG_NET_NS) -#include <linux/nsproxy.h> -#include <linux/un.h> -#endif - -#include <net/ipv6.h> -#include <linux/kobject.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv6.h> -#include <linux/cred.h> - - -/* The PVSock address (127.238.0.1) in binary form, host byte order. */ -#define PVTCP_PVSOCK_ADDR 0x7fee0001 -#define PVTCP_PVSOCK_NET 0x7fee0000 -#define PVTCP_PVSOCK_MASK 0x000000ff - -/* From mvpkm */ -extern uid_t Mvpkm_vmwareUid; - -/* - * Credentials to back socket file pointer. Used in Android ICS network - * data usage accounting to bill guest data to MVP. - */ -static struct cred _cred; -static struct file _file = { - .f_cred = &_cred, -}; - -/* From pvtcp_off_io_linux.c */ -extern CommOSAtomic PvtcpOutputAIOSection; -extern void PvtcpOffLargeDgramBufInit(void); - -static const unsigned short portRangeBase = 7000; -static const unsigned int portRangeSize = 31; -static int hooksRegistered = 0; - -static inline int PvtcpTestPortIndexBit(unsigned int addr, - unsigned int portIdx); -/** - * @note - * Netfilter hooks: - * - * We decide to drop each packet based on the following criteria: - * 1) Destination address is to a pvsock address AND - * 3) (NOT(uid == 0 OR uid == vmwareUid)) OR - * 4) (type == UDP AND NOT(port-in-pvsock-range))) - */ - -/** - * @brief Netfilter hook. Restricts LOCAL_OUT packets. - * See note above to filter policy. - * @param skb skbuff - * @param inet6 is this socket ipv4 or ipv6? - * @return NF_ACCEPT if the packet is allowed through, NF_DROP otherwise - */ -static inline unsigned int -PvsockNfHook(struct sk_buff *skb, int inet6) -{ - uid_t uid; - unsigned int port; - struct socket *sock; - unsigned int addr = inet6 ? - ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]) : - ntohl(ip_hdr(skb)->daddr); - - if (likely((addr ^ PVTCP_PVSOCK_NET) & ~PVTCP_PVSOCK_MASK)) { - /* Not a pvsock address. */ - return NF_ACCEPT; - } - - sock = skb->sk->sk_socket; - if (unlikely(!sock)) { - return NF_ACCEPT; - } - - /* - * Guest (kernel) sockets can send to other guest sockets, - * Root can send to whoever it wants, no checks. - */ - uid = (sock->file ? sock->file->f_cred->uid : 0); - if (uid == 0 || (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM)) { - return NF_ACCEPT; - } - - /* - * Only vmware can send to guest. - */ - if (likely(uid == Mvpkm_vmwareUid)) { - if (sock->type == SOCK_DGRAM) { - /* - * Deny sending to UDP port in pvsock range, if receiving socket was - * not created by the guest with this pvsock address. Drop all other - * UDP packets. - */ - port = ntohs(udp_hdr(skb)->dest) - portRangeBase; - if ((port < portRangeSize) && - PvtcpTestPortIndexBit(htonl(addr), port)) { - return NF_ACCEPT; - } - return NF_DROP; - } - /* - * TCP is all-good. - */ - return NF_ACCEPT; - } - - return NF_DROP; -} - - -/** - * @brief AF_INET4 Netfilter hook. Restricts LOCAL_OUT packets. - * See note above to filter policy. - * @param hooknum netfilter hook number - * @param skb skbuff - * @param in rx net_device - * @param out out net_device - * @param okfn ignored - * @return NF_ACCEPT if the packet is allowed through, NF_DROP otherwise - */ -static unsigned int -Inet4NfHook(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return PvsockNfHook(skb, 0); -} - -/** - * @brief AF_INET6 Netfilter hook. Restricts LOCAL_OUT packets. - * See note above to filter policy. - * @param hooknum netfilter hook number - * @param skb skbuff - * @param in rx net_device - * @param out out net_device - * @param okfn ignored - * @return NF_ACCEPT if the packet is allowed through, NF_DROP otherwise - */ -static unsigned int -Inet6NfHook(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - if (!ipv6_addr_v4mapped(&ipv6_hdr(skb)->daddr)) { - /* Not ipv4-mapped, so not a pvsock address. */ - return NF_ACCEPT; - } - - return PvsockNfHook(skb, 1); -} - - -static struct nf_hook_ops netfilterHooks[] = { - { - .hook = Inet4NfHook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_SECURITY - }, - { - .hook = Inet6NfHook, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_SECURITY - } -}; - - -#if !defined(CONFIG_SYSFS) -#error "The pvTCP offload module requires sysfs!" -#endif - -/* - * State kobject, attributes and type. - */ - -typedef struct PvtcpStateKObj { - struct kobject kobj; - CommTranspInitArgs transpArgs; - unsigned int pvsockAddr; - int useNS; - int haveNS; -} PvtcpStateKObj; - - -typedef struct PvtcpStateKObjAttr { - struct attribute attr; - ssize_t (*show)(PvtcpStateKObj *stateKObj, char *buf); - ssize_t (*store)(PvtcpStateKObj *stateKObj, const char *buf, size_t count); -} PvtcpStateKObjAttr; - - -/** - * @brief Releases state a kobject. - * @param kobj (embedded) state kobject. - */ - -static void -StateKObjRelease(struct kobject *kobj) -{ - kfree(container_of(kobj, PvtcpStateKObj, kobj)); -} - - -/** - * @brief Sysfs show function for all pvtcp attributes. - * @param kobj (embedded) state kobject. - * @param attr pvtcp attribute to show. - * @param buf output buffer. - * @return number of bytes written or negative error code. - */ - -static ssize_t -StateKObjShow(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - PvtcpStateKObjAttr *stateAttr = container_of(attr, PvtcpStateKObjAttr, attr); - PvtcpStateKObj *stateKObj = container_of(kobj, PvtcpStateKObj, kobj); - - if (stateAttr->show) { - return stateAttr->show(stateKObj, buf); - } - - return -EIO; -} - - -/** - * @brief Sysfs store function for all pvtcp attributes. - * @param kobj (embedded) state kobject. - * @param attr pvtcp attribute to show. - * @param buf input buffer. - * @param count input buffer length. - * @return number of bytes consumed or negative error code. - */ - -static ssize_t -StateKObjStore(struct kobject *kobj, - struct attribute *attr, - const char *buf, - size_t count) -{ - PvtcpStateKObjAttr *stateAttr = container_of(attr, PvtcpStateKObjAttr, attr); - PvtcpStateKObj *stateKObj = container_of(kobj, PvtcpStateKObj, kobj); - - if (stateAttr->store) { - return stateAttr->store(stateKObj, buf, count); - } - - return -EIO; -} - - -static struct sysfs_ops StateKObjSysfsOps = { - .show = StateKObjShow, - .store = StateKObjStore -}; - - -/** - * @brief Show function for the comm_info pvtcp attribute. - * @param stateKObj state kobject. - * @param buf output buffer. - * @return number of bytes written or negative error code. - */ - -static ssize_t -StateKObjCommInfoShow(PvtcpStateKObj *stateKObj, - char *buf) -{ - unsigned int typeHash; - - /* - * In the offload module, the transport arguments' type field has been - * assigned the matching index in the versions array at probe time. - * Recover and print out the type hash. - */ - - typeHash = CommTransp_GetType(pvtcpVersions[stateKObj->transpArgs.type]); - - return snprintf(buf, PAGE_SIZE, "ID=%u,%u\nCAPACITY=%u\nTYPE=0x%0x\n", - stateKObj->transpArgs.id.d32[0], - stateKObj->transpArgs.id.d32[1], - stateKObj->transpArgs.capacity, - typeHash); -} - - -/** - * @brief Show function for the pvsock_addr pvtcp attribute. - * @param stateKObj state kobject. - * @param buf output buffer. - * @return number of bytes written or negative error code. - */ - -static ssize_t -StateKObjPvsockAddrShow(PvtcpStateKObj *stateKObj, - char *buf) -{ - union { - unsigned int raw; - unsigned char bytes[4]; - } addr; - - addr.raw = stateKObj->pvsockAddr; - return snprintf(buf, PAGE_SIZE, "%u.%u.%u.%u\n", - (unsigned int)addr.bytes[0], (unsigned int)addr.bytes[1], - (unsigned int)addr.bytes[2], (unsigned int)addr.bytes[3]); -} - - -/** - * @brief Show function for the use_ns pvtcp attribute. - * @param stateKObj state kobject. - * @param buf output buffer. - * @return number of bytes written or negative error code. - */ - -static ssize_t -StateKObjUseNSShow(PvtcpStateKObj *stateKObj, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", stateKObj->useNS); -} - - -/** - * @brief Store function for the use_ns pvtcp attribute. - * @param stateKObj state kobject. - * @param buf input buffer. - * @param count input buffer length. - * @return number of bytes consumed or negative error code. - */ - -static ssize_t -StateKObjUseNSStore(PvtcpStateKObj *stateKObj, - const char *buf, - size_t count) -{ - int rc = -EINVAL; - - /* coverity[secure_coding] */ - if (stateKObj->haveNS && (sscanf(buf, "%d", &stateKObj->useNS) == 1)) { - stateKObj->useNS = !!stateKObj->useNS; - rc = count; - } - - return rc; -} - - -static PvtcpStateKObjAttr stateKObjCommInfoAttr = - __ATTR(comm_info, 0444, StateKObjCommInfoShow, NULL); - -static PvtcpStateKObjAttr stateKObjPvsockAddrAttr = - __ATTR(pvsock_addr, 0444, StateKObjPvsockAddrShow, NULL); - -static PvtcpStateKObjAttr stateKObjUseNSAttr = - __ATTR(use_ns, 0644, StateKObjUseNSShow, StateKObjUseNSStore); - - -static struct attribute *stateKObjDefaultAttrs[] = { - &stateKObjCommInfoAttr.attr, - &stateKObjPvsockAddrAttr.attr, - &stateKObjUseNSAttr.attr, - NULL -}; - - -static struct kobj_type stateKType = { - .sysfs_ops = &StateKObjSysfsOps, - .release = StateKObjRelease, - .default_attrs = stateKObjDefaultAttrs -}; - - -/* - * Initialization of module entry and exit callbacks. - */ - -static int Init(void *args); -static void Exit(void); - -COMM_OS_MOD_INIT(Init, Exit); - - -/* - * AIO socket read buffers, stats and other global state. - */ - -static CommOSMutex globalLock; -static char perCpuBuf[NR_CPUS][PVTCP_SOCK_BUF_SIZE]; - -#define PVTCP_OFF_MAX_LB_ADDRS 255 -static unsigned int loopbackAddrs[PVTCP_OFF_MAX_LB_ADDRS] = { - 0xffffffff, // Network address always on, all ports allowed. - 0x7fffffff // Host address not yet on, all ports allowed. - // All the rest zeroed out. -}; - -static const unsigned int loopbackReserved = 0x00000001 << 31; - - -#define PvtcpTestLoopbackBit(entry, mask) \ - ((entry) & (mask)) - -#define PvtcpSetLoopbackBit(entry, mask) \ - ((entry) |= (mask)) - -#define PvtcpResetLoopbackBit(entry, mask) \ - ((entry) &= ~(mask)) - - -static inline int -PvtcpTestPortIndexBit(unsigned int addr, - unsigned int portIdx) -{ - return PvtcpTestLoopbackBit(loopbackAddrs[*((unsigned char *)&addr + 3)], - BIT(portIdx)); -} - - -static inline void -PvtcpSetPortIndexBit(unsigned int addr, - unsigned int portIdx) -{ - PvtcpSetLoopbackBit(loopbackAddrs[*((unsigned char *)&addr + 3)], - BIT(portIdx)); -} - - -static inline void -PvtcpResetPortIndexBit(unsigned int addr, - unsigned int portIdx) -{ - PvtcpResetLoopbackBit(loopbackAddrs[*((unsigned char *)&addr + 3)], - BIT(portIdx)); -} - - -unsigned int pvtcpLoopbackOffAddr; - -unsigned long long pvtcpOffDgramAllocations = 0; - -/* - * Destructor shim addresses and function pointer - */ - -extern void asmDestructorShim(struct sock*); - - -/* - * Functions. - */ - -/** - * @brief Release a socket, NULLing out the fake file field to avoid confusing - * Linux on the release path - * @param sock socket to release - */ -static void -SockReleaseWrapper(struct socket *sock) -{ - sock->file = NULL; - sock_release(sock); -} - -/** - * @brief Gets a new loopback address in the 127.238.0.255 network. - * Note that the first address, 127.238.0.1, is always the host's. - * @return new address or -1U if none is available. - */ - -static unsigned int -GetLoopbackAddr(void) -{ - static unsigned char addrTempl[4] = { 127, 238, 0, 0 }; - unsigned int rc = -1U; - unsigned int idx; - struct socket *sock; - - CommOS_MutexLock(&globalLock); - for (idx = 1; idx < PVTCP_OFF_MAX_LB_ADDRS; idx++) { - if (!PvtcpTestLoopbackBit(loopbackAddrs[idx], loopbackReserved)) { - addrTempl[3] = (unsigned char)idx; - memcpy(&rc, addrTempl, sizeof rc); - - /* Create a dgram socket to configure/bring-up the lo:N interface. */ - - if (!sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock)) { - int err; - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr = { .s_addr = rc } - }; - struct ifreq ifr = { - .ifr_flags = IFF_UP - }; - - snprintf(ifr.ifr_name, sizeof ifr.ifr_name, "lo:%u", idx); - memcpy(&ifr.ifr_addr, &sin, sizeof ifr.ifr_addr); - err = kernel_sock_ioctl(sock, SIOCSIFADDR, (unsigned long)&ifr); - sock_release(sock); - if (err) { - CommOS_Log(("%s: Could not set loopback address (ioctl)!\n", - __FUNCTION__)); - rc = -1U; - continue; /* Try next address. */ - } else { - PvtcpSetLoopbackBit(loopbackAddrs[idx], loopbackReserved); - CommOS_Debug(("%s: Allocated loopback address [%u.%u.%u.%u].\n", - __FUNCTION__, - addrTempl[0], addrTempl[1], - addrTempl[2], addrTempl[3])); - break; - } - } else { - CommOS_Log(("%s: Could not set loopback address (create)!\n", - __FUNCTION__)); - rc = -1U; - break; - } - } - } - if (idx == PVTCP_OFF_MAX_LB_ADDRS) { - CommOS_Log(("%s: loopback address range exceeded!\n", __FUNCTION__)); - } - - CommOS_MutexUnlock(&globalLock); - return rc; -} - - -/** - * @brief Puts back a loopback address in the 127.238.0.255 network. - * @param uaddr address to put back. - */ - -static void -PutLoopbackAddr(unsigned int uaddr) -{ - const unsigned char addrTempl[3] = { 127, 238, 0 }; - unsigned char addr[4]; - unsigned int idx; - struct socket *sock; - - memcpy(addr, &uaddr, sizeof uaddr); - if (memcmp(addrTempl, addr, sizeof addrTempl)) { - return; - } - - idx = addr[3]; - if ((idx == 0) || (idx >= PVTCP_OFF_MAX_LB_ADDRS)) { - return; - } - - CommOS_MutexLock(&globalLock); - if (!PvtcpTestLoopbackBit(loopbackAddrs[idx], loopbackReserved)) { - CommOS_Debug(("%s: loopback entry [%u] already freed.\n", - __FUNCTION__, idx)); - goto out; - } - - if (!sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock)) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr = { .s_addr = uaddr } - }; - struct ifreq ifr = { - .ifr_flags = 0 - }; - - snprintf(ifr.ifr_name, sizeof ifr.ifr_name, "lo:%u", idx); - memcpy(&ifr.ifr_addr, &sin, sizeof ifr.ifr_addr); - kernel_sock_ioctl(sock, SIOCSIFFLAGS, (unsigned long)&ifr); - sock_release(sock); - loopbackAddrs[idx] = 0; // Zero everything out. - CommOS_Debug(("%s: Deallocated loopback address [%u.%u.%u.%u].\n", - __FUNCTION__, addr[0], addr[1], addr[2], addr[3])); - } else { - CommOS_Log(("%s: Could not delete loopback address!\n", - __FUNCTION__)); - } - -out: - CommOS_MutexUnlock(&globalLock); -} - - -/** - * @brief Retrieves and retains the namespace associated with a channel. - * A server must be listening for requests to retrieve the pid of the - * process owning the net namespace for the passed context/vm id. - * Communication takes place over a datagram socket in the AF_UNIX family, - * bound to "/usr/lib/vmware/pvtcp/config/serv_addr". - * @param state channel state for which to retrieve the network namespace. - * @sideeffect If an associated namespace is found, it is retained and saved - * in the state object. - */ - -static void -GetNetNamespace(PvtcpState *state) -{ -#if defined(CONFIG_NET_NS) && !defined(PVTCP_NET_NS_DISABLE) - CommTranspInitArgs args; - pid_t pidn; - struct pid *pid; - struct task_struct *tsk; - struct nsproxy *nsproxy; - struct net *ns; - struct socket *sock; - struct sockaddr_un addr = { - .sun_family = AF_UNIX - }; - struct timeval timeout = { - .tv_sec = 3000, - .tv_usec = 0 - }; - const int passcred = 1; - char buf[64]; - struct kvec vec; - const char *sockname = "pvtcp-vpn"; /* abstract namespace for AF_UNIX/LOCAL sockets */ - const size_t socknamelen = strlen(sockname); - - struct msghdr msg = { - .msg_name = (struct sockaddr *)&addr, - .msg_namelen = 1 + offsetof(struct sockaddr_un, sun_path) + socknamelen - }; - - - if (!state) { - return; - } - - args = CommSvc_GetTranspInitArgs(state->channel); - ns = NULL; - pidn = 0; - - if (sock_create_kern(AF_UNIX, SOCK_DGRAM, 0, &sock)) { - CommOS_Debug(("%s: Can't create config socket!\n", __FUNCTION__)); - goto out; - } - if (kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - (char *)&timeout, sizeof timeout)) { - sock_release(sock); - CommOS_Debug(("%s: Can't set timeout on config socket!\n", __FUNCTION__)); - goto out; - } - if (kernel_setsockopt(sock, SOL_SOCKET, SO_PASSCRED, - (char *)&passcred, sizeof passcred)) { - sock_release(sock); - CommOS_Debug(("%s: Can't set passcred on config socket!\n", - __FUNCTION__)); - goto out; - } - - /* - * Send the configuration request and receive the reply: - * - the request carries the VM/guest ID as used in the transport - * arguments used to create the channel. - * - the reply is expected to contain the pid of the namespace owner. - */ - - memset(buf, 0, sizeof buf); - snprintf(buf, sizeof buf, "%u\n", args.id.d32[0]); - buf[sizeof buf - 1] = '\0'; - vec.iov_base = buf; - vec.iov_len = strlen(buf); - - /* use anonymous name */ - addr.sun_path[0] = 0; - memcpy(addr.sun_path+1, sockname, socknamelen); - - if (kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len) <= 0) { - sock_release(sock); - CommOS_Debug(("%s: Could not send config request for vm [%u]!\n", - __FUNCTION__, args.id.d32[0])); - goto out; - } - - memset(buf, 0, sizeof buf); - vec.iov_base = buf; - vec.iov_len = sizeof buf; - if (kernel_recvmsg(sock, &msg, &vec, 1, vec.iov_len, 0) <= 0) { - CommOS_Debug(("%s: Could not receive config reply for vm [%u]!\n", - __FUNCTION__, args.id.d32[0])); - } else { - buf[sizeof buf - 1] = '\0'; - /* coverity[secure_coding] */ - sscanf(buf, "%d", &pidn); - } - sock_release(sock); - - if (!pidn) { - goto out; - } - - pid = find_get_pid(pidn); - if (pid) { - tsk = pid_task(pid, PIDTYPE_PID); - if (tsk) { - rcu_read_lock(); - nsproxy = task_nsproxy(tsk); - if (nsproxy && nsproxy->net_ns) { - ns = maybe_get_net(nsproxy->net_ns); - } - rcu_read_unlock(); - } - put_pid(pid); - } - -out: - if (!ns) { - CommOS_Debug(("%s: Not using a namespace for vm [%u].\n", - __FUNCTION__, args.id.d32[0])); - ns = &init_net; - } else { - CommOS_Debug(("%s: Found the net namespace for vm [%u].\n", - __FUNCTION__, args.id.d32[0])); - } -#else - void *ns = NULL; -#endif - - state->namespace = ns; -} - - -/** - * @brief Releases the network namespace associated with a channel state. - * @param namespace namespace to be released. - * @sideeffect If the namespace is not the initial one, it is released. - */ - -static void -PutNetNamespace(void *namespace) -{ -#if defined(CONFIG_NET_NS) && !defined(PVTCP_NET_NS_DISABLE) - if (namespace && (namespace != &init_net)) { - put_net((struct net *)namespace); - } -#endif -} - - -/** - * @brief Offload state constructor called when a channel is created. - * The function first calls the default state allocator; it then retrieves - * the n/w namespace associated with this client, retains it and stores it - * in the state object. Finally, it creates a sysfs node. - * @param[in,out] channel channel to initialize. - * @return pointer to a new state structure or NULL. - * @sideeffect Allocates memory. - */ - -static void * -StateAlloc(CommChannel channel) -{ - extern struct kset *Mvpkm_FindVMNamedKSet(int, const char *); - PvtcpState *state = NULL; - PvtcpIf *loopbackNetif = NULL; - PvtcpStateKObj *stateKObj = NULL; - struct kset *kset = NULL; - int rc; - CommTranspInitArgs transpArgs; - - transpArgs = CommSvc_GetTranspInitArgs(channel); - - /* - * The transport ID is assigned in an implementation-dependent way. - * (see lib/comm/comm_transp.h for transport type definitions.) - * However, the first 32 bits are expected to denote the guest/VM ID, - * while the last 32 bits are a resource handle within that VM. On MVP, - * transports map to queue pairs, which follow this convention. - */ - - kset = Mvpkm_FindVMNamedKSet((int)transpArgs.id.d32[0], "devices"); - if (!kset) { - CommOS_Debug(("%s: Could not find sysfs '.../vm/N/devices' kset!\n", - __FUNCTION__)); - goto error; - } - - state = PvtcpStateAlloc(channel); - if (!state) { - CommOS_Debug(("%s: Could not allocate state!\n", __FUNCTION__)); - goto error; - } - - /* coverity[leaked_storage] */ - stateKObj = kzalloc(sizeof *stateKObj, GFP_KERNEL); - if (!stateKObj) { - CommOS_Debug(("%s: Could not allocate state kobject!\n", __FUNCTION__)); - goto error; - } - - stateKObj->kobj.kset = kset; - /* coverity[leaked_storage] */ - rc = kobject_init_and_add(&stateKObj->kobj, &stateKType, NULL, "pvtcp"); - if (rc) { - CommOS_Debug(("%s: Could not add state kobject to parent kset [%d]!\n", - __FUNCTION__, rc)); - goto error; - } - - loopbackNetif = PvtcpStateFindIf(state, pvtcpIfLoopbackInet4); - BUG_ON(loopbackNetif == NULL); - loopbackNetif->conf.addr.in.s_addr = GetLoopbackAddr(); - if (loopbackNetif->conf.addr.in.s_addr == -1U) { - CommOS_Log(("%s: Could not allocate loopback address!\n", __FUNCTION__)); - goto error; - } - - GetNetNamespace(state); - - stateKObj->transpArgs = transpArgs; - stateKObj->pvsockAddr = loopbackNetif->conf.addr.in.s_addr; -#if defined(CONFIG_NET_NS) - stateKObj->haveNS = (state->namespace != &init_net); - stateKObj->useNS = stateKObj->haveNS; -#endif - state->extra = stateKObj; - - _cred.uid = _cred.gid = _cred.suid = _cred.sgid = - _cred.euid = _cred.egid = _cred.fsuid = _cred.fsgid = Mvpkm_vmwareUid; - - -out: - if (kset) { - kset_put(kset); - } - return state; - -error: - if (stateKObj) { - kobject_del(&stateKObj->kobj); - kobject_put(&stateKObj->kobj); - } - if (loopbackNetif && (loopbackNetif->conf.addr.in.s_addr != -1U)) { - PutLoopbackAddr(loopbackNetif->conf.addr.in.s_addr); - } - if (state) { - PvtcpStateFree(state); - state = NULL; - } - goto out; -} - - -/** - * @brief Offload state destructor called when a channel is closed. - * The function releases this client's n/w namespace and then calls the - * default state deallocator. - * @param arg pointer to state structure. - * @sideeffect Destroys all netifs and their sockets, deallocates memory. - */ - -static void -StateFree(void *arg) -{ - PvtcpState *state = arg; - PvtcpIf *loopbackNetif; - void *namespace; - - if (!state) { - return; - } - - if (state->extra) { - PvtcpStateKObj *stateKObj = state->extra; - - kobject_del(&stateKObj->kobj); - kobject_put(&stateKObj->kobj); - } - - namespace = state->namespace; - loopbackNetif = PvtcpStateFindIf(state, pvtcpIfLoopbackInet4); - BUG_ON(loopbackNetif == NULL); - PutLoopbackAddr(loopbackNetif->conf.addr.in.s_addr); - PvtcpStateFree(state); - PutNetNamespace(namespace); -} - - -/** - * @brief Releases socket. This function is called when the channel state - * owning the socket is closed. - * @param[in,out] pvsk PV socket to release. - * @sideeffect the socket eventually gets deallocated. - */ - -void -PvtcpReleaseSocket(PvtcpSock *pvsk) -{ - struct socket *sock = SkFromPvsk(pvsk)->sk_socket; - - SOCK_IN_LOCK(pvsk); - SOCK_OUT_LOCK(pvsk); - pvsk->peerSockSet = 0; - SockReleaseWrapper(sock); - SOCK_OUT_UNLOCK(pvsk); - SOCK_IN_UNLOCK(pvsk); - CommOS_Debug(("%s: [0x%p].\n", __FUNCTION__, pvsk)); -} - - -/** - * @brief Tests if the passed address is 127.238.0.1 or 127.0.0.1. - * @param pvsk socket to test. - * @param addr inet4 address to test. - * @return > 1: morph and propagate new address to caller, 1: just morph, - * 0: don't morph, < 0 (-EADDRNOTAVAIL): bad loopback. - */ - -static inline int -TestLoopbackInet4(PvtcpSock *pvsk, - unsigned int addr) -{ - if (!ipv4_is_loopback(addr)) { - return 0; - } - - if (addr != htonl(PVTCP_PVSOCK_ADDR)) { - if (addr != htonl(INADDR_LOOPBACK)) { - return -EADDRNOTAVAIL; - } - if (PvtcpHasSockNamespace(pvsk)) { - /* We don't morph normal 127.0.0.1 when NS present. */ - - return 0; - } - return 2; - } - - return 1; -} - - -/** - * @brief Tests if the passed address is 127.238.0.1 or 127.0.0.1 and the - * socket has a namespace. If yes, the address will be morphed into - * the actual loopback address, then a bind() is performed. - * Note that the function returns EADDRNOTAVAIL for any other loopbacks. - * @param pvsk socket to test. - * @param[in,out] addr inet4 address to test. - * @param port port to bind, or zero for any port. - * @return 1 if bind should be performed by caller, bind return code otherwise. - */ - -int -PvtcpTestAndBindLoopbackInet4(PvtcpSock *pvsk, - unsigned int *addr, - unsigned short port) -{ - int rc; - struct sockaddr_in sin; - unsigned int morphedAddr; - int propagate = 0; - - rc = TestLoopbackInet4(pvsk, *addr); - switch (rc) { - case 2: - propagate = 1; // Fall through. - case 1: - break; // Proceed with morphing. - case 0: - return 1; // Don't morph, let bind() be done by caller. - default: - return rc; - } - - if (pvsk->netif->conf.family == PVTCP_PF_LOOPBACK_INET4) { - /* The socket has already been morphed/bound. */ - - morphedAddr = pvsk->netif->conf.addr.in.s_addr; - rc = 0; - goto out; - } - - /* - * Move the socket to the initial namespace before binding it - * such that the loopback address is accessible to the host. - */ - - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_INITIAL); - PvtcpStateAddSocket(pvsk->channel, pvtcpIfLoopbackInet4, pvsk); - morphedAddr = pvsk->netif->conf.addr.in.s_addr; - memset(&sin, 0, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_port = port; - sin.sin_addr.s_addr = morphedAddr; - - /* Bind to the channel loopback address. */ - - rc = kernel_bind(SkFromPvsk(pvsk)->sk_socket, - (struct sockaddr *)&sin, sizeof sin); - if (rc) { - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_CHANNEL); - PvtcpStateAddSocket(pvsk->channel, pvtcpIfUnbound, pvsk); - } else { - /* - * Bind succeeded on pvsock address. - * If this is a pvsock UDP reserved port, record it. - */ - - port = ntohs(port) - portRangeBase; - if ((SkFromPvsk(pvsk)->sk_socket->type == SOCK_DGRAM) && - (port < portRangeSize)) { - CommOS_MutexLock(&globalLock); - PvtcpSetPortIndexBit(pvsk->netif->conf.addr.in.s_addr, port); - CommOS_MutexUnlock(&globalLock); - } - - /* - * pvsock data usage shouldn't be counted as MVP external traffic. - */ - SkFromPvsk(pvsk)->sk_socket->file = NULL; - } - -out: - if (propagate) { - *addr = morphedAddr; - } - return rc; -} - - -/** - * @brief Tests if the passed address is IPV4-mapped 127.238.0.1 or 127.0.0.1, - * clean ::1, and whether the socket has a namespace. - * If needed, the address will be morphed into the actual loopback address, - * then a bind() is performed. - * Note that the function returns EADDRNOTAVAIL for any other loopbacks. - * @param pvsk socket to test. - * @param[in,out] addr0 first 64 bits of inet6 address to test. - * @param[in,out] addr1 last 64 bits of inet6 address to test. - * @param port port to bind, or zero for any port. - * @return 1 if bind should be performed by caller, bind return code otherwise. - */ - -int -PvtcpTestAndBindLoopbackInet6(PvtcpSock *pvsk, - unsigned long long *addr0, - unsigned long long *addr1, - unsigned short port) -{ - int rc; - struct sockaddr_in6 sin6; - union { - unsigned long long halves[2]; - struct in6_addr in6; - } in6Addr = { - .halves = { *addr0, *addr1 } - }; - int propagate = 0; - const int ipv6Only = 0; - - if (ipv6_addr_loopback(&in6Addr.in6)) { - if (PvtcpHasSockNamespace(pvsk)) { - return 1; - } - - /* Remember that we were passed '::1'. */ - - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_IPV6_LOOP, 1); - ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), &in6Addr.in6); - } - - if (!ipv6_addr_v4mapped(&in6Addr.in6)) { - /* If the address is not ipv4-mapped, stop testing. */ - - return 1; - } - - rc = TestLoopbackInet4(pvsk, in6Addr.in6.s6_addr32[3]); - switch (rc) { - case 2: - propagate = 1; // Fall through. - case 1: - break; // Proceed with morphing. - case 0: - return 1; // Don't morph, let bind() be done by caller. - default: - return rc; - } - - if (pvsk->netif->conf.family == PVTCP_PF_LOOPBACK_INET4) { - /* The socket has already been morphed/bound. */ - - ipv6_addr_set_v4mapped(pvsk->netif->conf.addr.in.s_addr, &in6Addr.in6); - rc = 0; - goto out; - } - - /* - * Move the socket to the initial namespace before binding it - * such that the loopback address is accessible to the host. - */ - - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_INITIAL); - PvtcpStateAddSocket(pvsk->channel, pvtcpIfLoopbackInet4, pvsk); - ipv6_addr_set_v4mapped(pvsk->netif->conf.addr.in.s_addr, &in6Addr.in6); - memset(&sin6, 0, sizeof sin6); - sin6.sin6_family = AF_INET6; - sin6.sin6_port = port; - sin6.sin6_addr = in6Addr.in6; - - /* - * Ensure we can use ipv4 mapped addresses and bind to the channel - * loopback address. - */ - - (void)kernel_setsockopt(SkFromPvsk(pvsk)->sk_socket, IPPROTO_IPV6, - IPV6_V6ONLY, (char *)&ipv6Only, sizeof ipv6Only); - rc = kernel_bind(SkFromPvsk(pvsk)->sk_socket, - (struct sockaddr *)&sin6, sizeof sin6); - if (rc) { - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_CHANNEL); - PvtcpStateAddSocket(pvsk->channel, pvtcpIfUnbound, pvsk); - } else { - /* - * Bind succeeded on pvsock address. - * If this is a pvsock UDP reserved port, record it. - */ - - port = ntohs(port) - portRangeBase; - if ((SkFromPvsk(pvsk)->sk_socket->type == SOCK_DGRAM) && - (port < portRangeSize)) { - CommOS_MutexLock(&globalLock); - PvtcpSetPortIndexBit(pvsk->netif->conf.addr.in.s_addr, port); - CommOS_MutexUnlock(&globalLock); - } - - /* - * pvsock data usage shouldn't be counted as MVP external traffic. - */ - SkFromPvsk(pvsk)->sk_socket->file = NULL; - } - -out: - if (propagate) { - *addr0 = in6Addr.halves[0]; - *addr1 = in6Addr.halves[1]; - } - return rc; -} - - -/** - * @brief Resets a 127.238.0.N address to 127.0.0.1. - * @param pvsk socket whose address needs resetting. - * @param[in,out] addr inet4 address to reset. - */ - -void -PvtcpResetLoopbackInet4(PvtcpSock *pvsk, - unsigned int *addr) -{ - if (!PvtcpHasSockNamespace(pvsk)) { - static const unsigned int pvsockAddr = htonl(PVTCP_PVSOCK_ADDR); - - if (!memcmp(&pvsockAddr, addr, 3) && memcmp(&pvsockAddr, addr, 4)) { - /* If it's a pvsock address but _not_ the host's, overwrite it. */ - - *addr = htonl(INADDR_LOOPBACK); - } - } -} - - -/** - * @brief Resets an IPV4-mapped ::ffff:127.238.0.N IPV6 address to loopback. - * @param pvsk socket whose address needs resetting. - * @param[in,out] in6 inet6 address to reset. - */ - -void -PvtcpResetLoopbackInet6(PvtcpSock *pvsk, - struct in6_addr *in6) -{ - if (!PvtcpHasSockNamespace(pvsk) && ipv6_addr_v4mapped(in6)) { - if (PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_IPV6_LOOP)) { - /* If the original address came in as ::1, we reset as such. */ - - static const struct in6_addr in6Loopback = IN6ADDR_LOOPBACK_INIT; - - *in6 = in6Loopback; - } else { - PvtcpResetLoopbackInet4(pvsk, &in6->s6_addr32[3]); - } - } -} - - -/** - * @brief Called at module load time. It registers with the Comm runtime. - * @param args initialization arguments - * @return zero if successful, -1 otherwise - * @sideeffect Leaves the module loaded - */ - -static int -Init(void *args) -{ - int rc = -1; - -#if !defined(PVTCP_DISABLE_NETFILTER) - rc = nf_register_hooks(netfilterHooks, ARRAY_SIZE(netfilterHooks)); - if (rc) { - CommOS_Log(("%s: Could not register netfilter hooks!\n", __FUNCTION__)); - goto out; - } else { - CommOS_Debug(("%s: Registered netfilter hooks.\n", __FUNCTION__)); - } - hooksRegistered = 1; -#else - CommOS_Log(("%s: Netfilter hooks disabled.\n", __FUNCTION__)); -#endif - - CommOS_MutexInit(&globalLock); - CommOS_WriteAtomic(&PvtcpOutputAIOSection, 0); - PvtcpOffLargeDgramBufInit(); - - pvtcpImpl.owner = CommOS_ModuleSelf(); - pvtcpImpl.stateCtor = StateAlloc; - pvtcpImpl.stateDtor = StateFree; - if (CommSvc_RegisterImpl(&pvtcpImpl) == 0) { - rc = 0; - pvtcpLoopbackOffAddr = GetLoopbackAddr(); - if (pvtcpLoopbackOffAddr == -1U) { - CommOS_Log(("%s: Could not allocate offload loopback address!\n", - __FUNCTION__)); - rc = -1; - CommSvc_UnregisterImpl(&pvtcpImpl); - } - } - -out: - if (rc) { - if (hooksRegistered) { - nf_unregister_hooks(netfilterHooks, ARRAY_SIZE(netfilterHooks)); - } - } - return rc; -} - - -/** - * @brief Called at module unload time. It shuts down pvtcp. - * @sideeffect Total and utter destruction. - */ - -static void -Exit(void) -{ - PutLoopbackAddr(pvtcpLoopbackOffAddr); - CommSvc_UnregisterImpl(&pvtcpImpl); -#if !defined(PVTCP_DISABLE_NETFILTER) - if (hooksRegistered) { - nf_unregister_hooks(netfilterHooks, ARRAY_SIZE(netfilterHooks)); - CommOS_Debug(("%s: Netfilter hooks unregistered.\n", __FUNCTION__)); - } -#endif - CommOS_Log(("%s: Allocations of large datagrams: %llu.\n", - __FUNCTION__, pvtcpOffDgramAllocations)); -} - - -/* - * Socket callback interceptors. - */ - -/** - * @brief Callback called when socket is destroyed. - * @param[in,out] sk socket to cleanup - * @return 0 if socket memory is freed, < 0 otherwise (no-op) - * @sideeffect Send queue buffers are deallocated - */ - -int -DestructCB(struct sock *sk) -{ - PvtcpOffBuf *internalBuf; - PvtcpOffBuf *tmp; - PvtcpSock *pvsk = PvskFromSk(sk); - - if (!pvsk || - (SkFromPvsk(pvsk) != sk) || - (pvsk->destruct == asmDestructorShim)) { - /* Module put _not_ to be performed by asmDestructorShim. */ - - CommOS_Debug(("%s: pvsk / sk inconsistency. Ignored.\n", __FUNCTION__)); - return -1; - } - - CommOS_ListForEachSafe(&pvsk->queue, internalBuf, tmp, link) { - CommOS_ListDel(&internalBuf->link); - PvtcpBufFree(PvtcpOffBufFromInternal(internalBuf)); - } - if (pvsk->destruct) { - pvsk->destruct(sk); - } - - if (pvsk->rpcReply) { - CommOS_Kfree(pvsk->rpcReply); - } - CommOS_Kfree(pvsk); - - /* - * Module put is performed by asmDestructorShim. - */ - - return 0; -} - - -/** - * @brief Callback called when socket state changes occur. - * @param sk socket specified socket which changed state - * @sideeffect A writer task may be scheduled - */ - -static void -StateChangeCB(struct sock *sk) -{ - PvtcpSock *pvsk = PvskFromSk(sk); - - if (!pvsk || - (SkFromPvsk(pvsk) != sk) || - (pvsk->stateChange == StateChangeCB)) { - CommOS_Debug(("%s: pvsk / sk inconsistency. Ignored.\n", __FUNCTION__)); - return; - } - - /* - * The socket (spin) lock is held when this function is called. - */ - - CommOS_Debug(("%s: [0x%p] sk_state [%u] sk_err [%d] sk_err_soft [%d].\n", - __FUNCTION__, pvsk, sk->sk_state, - sk->sk_err, sk->sk_err_soft)); - if (pvsk->stateChange) { - pvsk->stateChange(sk); - } - if (sk->sk_state == TCP_ESTABLISHED) { - PvskSetOpFlag(pvsk, PVTCP_OP_CONNECT); - } - PvtcpSchedSock(pvsk); -} - - -/** - * @brief Callback called when an error is set on the socket. - * @param sk socket the error happened on - * @sideeffect A writer task may be scheduled - */ - -static void -ErrorReportCB(struct sock *sk) -{ - PvtcpSock *pvsk = PvskFromSk(sk); - - if (!pvsk || - (SkFromPvsk(pvsk) != sk) || - (pvsk->errorReport == ErrorReportCB)) { - CommOS_Debug(("%s: pvsk / sk inconsistency. Ignored\n", __FUNCTION__)); - return; - } - - /* - * The socket (spin) lock is held when this function is called. - * Interesting sk_err-s: - * ECONNRESET - tcp_disconnect(), tcp_reset() - * ECONNREFUSED - tcp_reset() - * EPIPE - tcp_reset() - * ETIMEDOUT - tcp_write_error() - * EHOSTUNREACH, etc. - tcp_v4_error()??, icmp errors - * etc. - __udp4_lib_err(), icmp errors - */ - - CommOS_Debug(("%s: [0x%p] sk_err [%d] sk_err_soft [%d].\n", - __FUNCTION__, pvsk, sk->sk_err, sk->sk_err_soft)); - if (pvsk->errorReport) { - pvsk->errorReport(sk); - } - pvsk->err = sk->sk_err; - PvtcpSchedSock(pvsk); -} - - -/** - * @brief Callback called when data is available to be read from a socket. - * @param sk socket in question - * @param bytes number of bytes to read - * @sideeffect A writer task is scheduled _iff_ the peer can safely - * receive. - */ - -static void -DataReadyCB(struct sock *sk, - int bytes) -{ - PvtcpSock *pvsk = PvskFromSk(sk); - - if (!pvsk || - (SkFromPvsk(pvsk) != sk) || - (pvsk->dataReady == DataReadyCB)) { - CommOS_Debug(("%s: pvsk / sk inconsistency. Ignored.\n", __FUNCTION__)); - return; - } - - /* - * The socket (spin) lock is held when this function is called. - */ - - if (pvsk->dataReady) { - pvsk->dataReady(sk, bytes); - } - if (sk->sk_state == TCP_LISTEN) { - CommOS_Debug(("%s: Listen socket ready to accept [0x%p].\n", - __FUNCTION__, pvsk)); - } - PvtcpSchedSock(pvsk); -} - - -/** - * @brief Callback called when writing is possible on a socket. - * @param sk socket in question - * @sideeffect An AIO thread is scheduled. - */ - -static void -WriteSpaceCB(struct sock *sk) -{ - PvtcpSock *pvsk = PvskFromSk(sk); - - if (!pvsk || - (SkFromPvsk(pvsk) != sk) || - (pvsk->writeSpace == WriteSpaceCB)) { - CommOS_Debug(("%s: pvsk / sk inconsistency. Ignored.\n", __FUNCTION__)); - return; - } - - /* - * The socket (spin) lock is held when this function is called. - */ - - if (pvsk->writeSpace) { - pvsk->writeSpace(sk); - } - PvtcpSchedSock(pvsk); -} - - -/** - * @brief Initializes a newly created socket for offload operations. - * @param[in,out] sock socket to initialize - * @param channel channel to update - * @param peerSock peer PV socket of this socket - * @param parentPvsk parent of this socket or NULL - * @return zero on success, error code otherwise - */ - -static int -SockAllocInit(struct socket *sock, - CommChannel channel, - unsigned long long peerSock, - PvtcpSock *parentPvsk) -{ - struct sock *sk; - PvtcpSock *pvsk; - int sndBuf = PVTCP_SOCK_RCVSIZE * 4; - - if (!sock || !channel || !peerSock) { - return -EINVAL; - } - - sk = sock->sk; - sk->sk_user_data = NULL; - - pvsk = CommOS_Kmalloc(sizeof *pvsk); - if (!pvsk) { - return -ENOMEM; - } - - if (PvtcpOffSockInit(pvsk, channel)) { - CommOS_Kfree(pvsk); - return -ENOMEM; - } - - /* - * PVTCP sockets should be billed against the vmware uid. - */ - sk->sk_socket->file = &_file; - - /* Set peer (pv) socket. */ - pvsk->peerSock = peerSock; - pvsk->peerSockSet = 1; - - /* Set up back pointer. */ - pvsk->sk = sk; - - /* Keep track of new socket. */ - if (PvtcpStateAddSocket(channel, pvtcpIfUnbound, pvsk) != 0) { - CommOS_Kfree(pvsk); - return -ENOMEM; - } - - /* - * Keep pvtcp around for at least the lifetime of this socket - */ - CommOS_ModuleGet(pvtcpImpl.owner); - - if (!parentPvsk) { - pvsk->destruct = sk->sk_destruct; - sk->sk_destruct = asmDestructorShim; - pvsk->stateChange = sk->sk_state_change; - sk->sk_state_change = StateChangeCB; - pvsk->errorReport = sk->sk_error_report; - sk->sk_error_report = ErrorReportCB; - pvsk->dataReady = sk->sk_data_ready; - sk->sk_data_ready = DataReadyCB; - pvsk->writeSpace = sk->sk_write_space; - sk->sk_write_space = WriteSpaceCB; - } else { - /* - * Copy the parent's saved callbacks. The parent pvsk is only passed - * when creating/initializing a socket after an 'accept'. - */ - - pvsk->destruct = parentPvsk->destruct; - sk->sk_destruct = asmDestructorShim; - pvsk->stateChange = parentPvsk->stateChange; - sk->sk_state_change = StateChangeCB; - pvsk->errorReport = parentPvsk->errorReport; - sk->sk_error_report = ErrorReportCB; - pvsk->dataReady = parentPvsk->dataReady; - sk->sk_data_ready = DataReadyCB; - pvsk->writeSpace = parentPvsk->writeSpace; - sk->sk_write_space = WriteSpaceCB; - - if (parentPvsk->netif->conf.family == PVTCP_PF_LOOPBACK_INET4) { - /* The parent socket was morphed/bound. */ - - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_INITIAL); - PvtcpStateAddSocket(pvsk->channel, pvtcpIfLoopbackInet4, pvsk); - } - } - - /* Install forward socket reference. */ - sk->sk_user_data = pvsk; - - /* - * Force the send buffer size high enough, such that we don't lose the - * just-a-bit-over-the-limit bytes. This is mainly needed for datagrams. - * Note that we always apply flow control between host and guest modules, - * according to the sizing model; so this is not artificially inflated. - */ - - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUFFORCE, - (void *)&sndBuf, sizeof sndBuf); - - return 0; -} - - -/** - * @brief Allocates a pvsk socket for error reporting (create operation). - * @param err error code to report to PV side - * @param channel channel error socket belongs to - * @param peerSock peer PV socket of this socket - * @return error socket on success, NULL otherwise - */ - -static PvtcpSock * -SockAllocErrInit(int err, - CommChannel channel, - unsigned long long peerSock) -{ - PvtcpSock *pvsk; - - if (!channel || !peerSock) { - return NULL; - } - - pvsk = CommOS_Kmalloc(sizeof *pvsk); - if (!pvsk) { - return NULL; - } - - if (PvtcpOffSockInit(pvsk, channel)) { - CommOS_Kfree(pvsk); - return NULL; - } - - /* Set peer (pv) socket and error. */ - pvsk->peerSock = peerSock; - pvsk->peerSockSet = 1; - pvsk->err = err; - - /* Set up back pointer to NULL such that PvtcpPutSock deallocates it. */ - pvsk->sk = NULL; - return pvsk; -} - - -/* - * Offload operations. - */ - -/** - * @brief Creates an offload socket and schedules it for reply. - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back. - */ - -void -PvtcpCreateOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - int rc; - struct socket *sock; - PvtcpSock *pvsk; - PvtcpState *state = (PvtcpState *)upperLayerState; - const int enable = 1; - - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - -#if defined(PVTCP_IPV6_DISABLE) - if (packet->data16 == AF_INET6) { - CommOS_Debug(("%s: AF_INET6 support is disabled.\n", __FUNCTION__)); - rc = -EAFNOSUPPORT; - } else -#endif - { - rc = sock_create_kern(packet->data16, packet->data32, - packet->data32ex, &sock); - } - - if (!rc) { - rc = SockAllocInit(sock, channel, packet->data64, NULL); - if (rc) { - SockReleaseWrapper(sock); - goto fail; - } - kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (void *)&enable, sizeof enable); - pvsk = PvskFromSk(sock->sk); - if (state->extra && - ((PvtcpStateKObj *)(state->extra))->useNS) { - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_CHANNEL); - } else { - PvtcpSwitchSock(pvsk, PVTCP_SOCK_NAMESPACE_INITIAL); - } - PvtcpStateAddSocket(pvsk->channel, pvtcpIfUnbound, pvsk); - PvskSetOpFlag(pvsk, PVTCP_OP_CREATE); - } else { - CommOS_Debug(("%s: Error creating offload socket: %d\n", - __FUNCTION__, rc)); - /* - * Pass -rc so we follow error conventions for other reply ops. - * The error code is fixed by the PV side so error codes are properly - * reported. - */ - pvsk = SockAllocErrInit(-rc, channel, packet->data64); - if (!pvsk) { - goto fail; - } - } - - PvtcpSchedSock(pvsk); - return; - -fail: - CommOS_Log(("%s: BOOG ** FAILED TO CREATE OFFLOAD SOCKET [%d] " - "_AND_ ERROR REPORTING SOCKET!\n" - " PV SIDE MAY BE LOCKED UP UNTIL CREATE RPC TIMES OUT!", - __FUNCTION__, rc)); -} - - -/** - * @brief Schedules an offload socket to be removed. - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back and - * then release the socket. - */ - -void -PvtcpReleaseOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - - /* - * Check if this is a pvsock datagram socket bound on a reserved port. - * If so, reset the bit such that filtering drops rogue packets. - */ - - if ((sk->sk_socket->type == SOCK_DGRAM) && - (pvsk->netif->conf.family == PVTCP_PF_LOOPBACK_INET4)) { - unsigned short port = 0; - - if (sk->sk_family == AF_INET) { - struct sockaddr_in sin = { .sin_family = AF_INET }; - int addrLen = sizeof sin; - - if(!kernel_getsockname(sk->sk_socket, - (struct sockaddr *)&sin, &addrLen)) { - port = sin.sin_port; - } - } else { /* AF_INET6 */ - struct sockaddr_in6 sin = { .sin6_family = AF_INET6 }; - int addrLen = sizeof sin; - - if(!kernel_getsockname(sk->sk_socket, - (struct sockaddr *)&sin, &addrLen)) { - port = sin.sin6_port; - } - } - - port = ntohs(port) - portRangeBase; - if (port < portRangeSize) { - CommOS_MutexLock(&globalLock); - PvtcpResetPortIndexBit(pvsk->netif->conf.addr.in.s_addr, port); - CommOS_MutexUnlock(&globalLock); - } - } - - /* - * - hold the socket before setting the 'release' flag and until after - * the call to PvtcpSchedSock(): if the socket had already been scheduled - * ReleaseAIO may run, find the flag set and release this socket while - * it's being unlocked here. - * - * - hold the dispatch lock until done to ensure that subsequent Ops for - * this socket see peerSockSet == 0. - */ - - PvtcpHoldSock(pvsk); - SOCK_STATE_LOCK(pvsk); - pvsk->peerSockSet = 0; - SOCK_STATE_UNLOCK(pvsk); - PvskSetOpFlag(pvsk, PVTCP_OP_RELEASE); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); - PVTCP_UNLOCK_DISP_DISCARD_VEC(); -} - - -/** - * @brief Binds an offload socket to a given address - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back - */ - -void -PvtcpBindOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - struct sockaddr *addr; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - int reuseAddr; - int addrLen; - int rc; - - PvtcpHoldSock(pvsk); - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - - /* - * The socket-level option SO_REUSEADDR is set in the common socket code, - * meaning that we cannot intercept it in the guest pvtcp implementation. - * In order to respect the setting, the guest would pass the current - * setting in 'bind' requests. - * If the guest requires 'reuse address' setting, the value is incremented - * such that we differentiate between: 0) not requested, 1) 'false' and - * 2) 'true'. - */ - - reuseAddr = COMM_OPF_GET_VAL(packet->flags); - if ((reuseAddr == 1) || (reuseAddr == 2)) { - /* Explicit request, so decrement the value. */ - - reuseAddr--; - kernel_setsockopt(sk->sk_socket, SOL_SOCKET, SO_REUSEADDR, - (void *)&reuseAddr, sizeof reuseAddr); - } - - if (sk->sk_family == AF_INET) { - memset(&sin, 0, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_port = packet->data16; - sin.sin_addr.s_addr = (unsigned int)packet->data64ex; - addr = (struct sockaddr *)&sin; - addrLen = sizeof sin; - - rc = PvtcpTestAndBindLoopbackInet4(pvsk, &sin.sin_addr.s_addr, - sin.sin_port); - if (rc <= 0) { - /* Bind has already happened. */ - - pvsk->err = -rc; - goto out; - } - } else { /* AF_INET6 */ - memset(&sin6, 0, sizeof sin6); - sin6.sin6_family = AF_INET6; - sin6.sin6_port = packet->data16; - addr = (struct sockaddr *)&sin6; - addrLen = sizeof sin6; - - rc = PvtcpTestAndBindLoopbackInet6(pvsk, &packet->data64ex, - &packet->data64ex2, sin6.sin6_port); - if (rc <= 0) { - /* Bind has already happened. */ - - pvsk->err = -rc; - goto out; - } - PvtcpI6AddrUnpack(&sin6.sin6_addr.s6_addr32[0], - packet->data64ex, packet->data64ex2); - } - - /* coverity[check_return] */ - pvsk->err = -kernel_bind(sk->sk_socket, addr, addrLen); - -out: - PvskSetOpFlag(pvsk, PVTCP_OP_BIND); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Sets a socket option. - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back - */ -void -PvtcpSetSockOptOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - unsigned int optlen = packet->len - sizeof *packet; - - PvtcpHoldSock(pvsk); - - if ((vecLen != 1) || (vec[0].iov_len != optlen) || (optlen < sizeof(int))) { - pvsk->rpcStatus = -EINVAL; - goto out; - } - - if (packet->data32 == SOL_TCP) { - /* - * The back-end implementation must always run in 'nodelay' mode. - * Consequently, we ignore, but we cache the TCP_NODELAY and TCP_CORK - * settings such that getsockopt() can return them as they were 'set'. - * Applications use these settings for performance; pvtcp does quite - * well if it's not interfered with. - */ - - int on; - - switch (packet->data32ex) { - case TCP_NODELAY: - memcpy(&on, vec[0].iov_base, sizeof on); - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_TCP_NODELAY, on); - pvsk->rpcStatus = 0; - goto out; - case TCP_CORK: - memcpy(&on, vec[0].iov_base, sizeof on); - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_TCP_CORK, on); - pvsk->rpcStatus = 0; - goto out; - } - } - - pvsk->rpcStatus = kernel_setsockopt(sock, - packet->data32, - packet->data32ex, - vec[0].iov_base, - optlen); - -out: - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - PvskSetOpFlag(pvsk, PVTCP_OP_SETSOCKOPT); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Retrieves a socket option. - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back - */ -void -PvtcpGetSockOptOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - unsigned int optLen = (unsigned int)(packet->data64ex); - char *optBuf; - int rc = 0; - - PvtcpHoldSock(pvsk); - - if ((optLen < sizeof(int)) || (optLen > PVTCP_SOCK_SAFE_RCVSIZE)) { - pvsk->rpcStatus = -EINVAL; - goto out; - } - - optBuf = CommOS_Kmalloc(optLen); - if (!optBuf) { - pvsk->rpcStatus = -EINVAL; - goto out; - } - - if (packet->data32 == SOL_TCP) { - /* - * See comment in PvtcpSetSockOptOp() regarding special treatment for - * the TCP_NODELAY and TCP_CORK settings. - */ - - int on; - - switch (packet->data32ex) { - case TCP_NODELAY: - on = PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_TCP_NODELAY); - optLen = sizeof on; - memcpy(optBuf, &on, optLen); - goto done; - case TCP_CORK: - on = PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_TCP_CORK); - optLen = sizeof on; - memcpy(optBuf, &on, optLen); - goto done; - } - } - - rc = kernel_getsockopt(sock, packet->data32, - packet->data32ex, optBuf, &optLen); - -done: - if (!rc) { - pvsk->rpcReply = optBuf; - CommOS_MemBarrier(); - pvsk->rpcStatus = (int)optLen; - } else { - CommOS_Kfree(optBuf); - pvsk->rpcStatus = rc; - } - -out: - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - PvskSetOpFlag(pvsk, PVTCP_OP_GETSOCKOPT); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Performs ioctl on offload socket. - * @param channel communication channel with offloader - * @param state state associated with this channel - * @param packet packet header received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - */ - -void -PvtcpIoctlOp(CommChannel channel, - void *state, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, state); - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - - PvtcpHoldSock(pvsk); - - /* Not implemented yet. */ - - (void)sock; - pvsk->rpcStatus = -ENOIOCTLCMD; - - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - PvskSetOpFlag(pvsk, PVTCP_OP_IOCTL); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Marks a socket for listening to incoming connections - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back - */ - -void -PvtcpListenOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - int backlog = (int)packet->data32; - - PvtcpHoldSock(pvsk); - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - - pvsk->err = -kernel_listen(sk->sk_socket, backlog); - PvskSetOpFlag(pvsk, PVTCP_OP_LISTEN); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Accepts a connected socket - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back. - */ - -void -PvtcpAcceptOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - int rc; - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - struct socket *newsock = NULL; - - PvtcpHoldSock(pvsk); - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - - rc = kernel_accept(sk->sk_socket, &newsock, O_NONBLOCK); - if (rc == 0) { - rc = SockAllocInit(newsock, channel, packet->data64ex, pvsk); - if (rc) { - SockReleaseWrapper(newsock); - } - } - - if (rc == 0) { - struct sock *newsk = newsock->sk; - PvtcpSock *newpvsk = PvskFromSk(newsk); - - /* We temporarily use the state field to cache parent socket. */ - - newpvsk->state = (PvtcpState *)pvsk; - PvskSetOpFlag(newpvsk, PVTCP_OP_ACCEPT); - PvtcpSchedSock(newpvsk); - } else { - pvsk->err = -rc; - PvskSetOpFlag(pvsk, PVTCP_OP_ACCEPT); - PvtcpSchedSock(pvsk); - } - - PvtcpPutSock(pvsk); -} - - -/** - * @brief Connects an offload socket to given address - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect A writer task is scheduled, which will send reply back - */ - -void -PvtcpConnectOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - struct sock *sk = SkFromPvsk(pvsk); - struct sockaddr *addr; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - int addrLen; - int flags = 0; - int rc = 0; - int disconnect = 0; - - PvtcpHoldSock(pvsk); - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - - if (sk->sk_family == AF_INET) { - addr = (struct sockaddr *)&sin; - addrLen = sizeof sin; - memset(&sin, 0, sizeof sin); - sin.sin_port = packet->data16; - sin.sin_addr.s_addr = (unsigned int)packet->data64ex; - if (COMM_OPF_GET_VAL(packet->flags)) { - sin.sin_family = AF_UNSPEC; - disconnect = 1; - goto connect; - } - sin.sin_family = AF_INET; - PvtcpTestAndBindLoopbackInet4(pvsk, &sin.sin_addr.s_addr, 0); - } else { /* AF_INET6 */ - addr = (struct sockaddr *)&sin6; - addrLen = sizeof sin6; - memset(&sin6, 0, sizeof sin6); - sin6.sin6_port = packet->data16; - if (COMM_OPF_GET_VAL(packet->flags)) { - sin6.sin6_family = AF_UNSPEC; - PvtcpI6AddrUnpack(&sin6.sin6_addr.s6_addr32[0], - packet->data64ex, packet->data64ex2); - disconnect = 1; - goto connect; - } - sin6.sin6_family = AF_INET6; - PvtcpTestAndBindLoopbackInet6(pvsk, &packet->data64ex, - &packet->data64ex2, 0); - PvtcpI6AddrUnpack(&sin6.sin6_addr.s6_addr32[0], - packet->data64ex, packet->data64ex2); - } - -connect: - rc = kernel_connect(sk->sk_socket, addr, addrLen, flags | O_NONBLOCK); - - /* - * For datagram sockets, ErrorReportCB is not called, so we need to - * explicitly set the pvsk error to be returned back to the guest. - * This should not be used on SOCK_STREAM sockets. You have been - * warned. - */ - - if (rc && (sk->sk_socket->type == SOCK_DGRAM)) { - pvsk->err = -rc; - } - - /* - * Quite likely, stream actual connect requests will set err to EINPROGRESS. - * That's fine, error_report will trigger an AIO/flow-op reply. When the - * connection is established, state_change schedules an AIO/connect reply. - * Record whether the request was a disconnect. - */ - - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_DISCONNECT, disconnect); - PvskSetOpFlag(pvsk, PVTCP_OP_CONNECT); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/** - * @brief Initiates socket shutdown on an offload socket - * @param channel communication channel with offloader - * @param upperLayerState state associated with this channel - * @param packet first packet received in reply - * @param vec payload buffer descriptors - * @param vecLen payload buffer descriptor count - * @sideeffect Socket queue will be drained and socket shutdown performed. - */ - -void -PvtcpShutdownOp(CommChannel channel, - void *upperLayerState, - CommPacket *packet, - struct kvec *vec, - unsigned int vecLen) -{ - PvtcpSock *pvsk = PvtcpGetPvskOrReturn(packet->data64, upperLayerState); - int how = (int)packet->data32; - - PvtcpHoldSock(pvsk); - if ((how == SHUT_RD) || (how == SHUT_RDWR)) { - kernel_sock_shutdown(SkFromPvsk(pvsk)->sk_socket, SHUT_RD); - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_RD, 1); - } - if ((how == SHUT_WR) || (how == SHUT_RDWR)) { - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_SHUT_WR, 1); - } - PVTCP_UNLOCK_DISP_DISCARD_VEC(); - PvtcpSchedSock(pvsk); - PvtcpPutSock(pvsk); -} - - -/* - * AIO functions called from the main AIO processing function. - * Most of these functions complete processing initiated by the corresponding - * offload operations above. - */ - -/** - * @brief Processes socket release in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket to release. - * @sideeffect the socket will be released upon return from this function. - */ - -static inline void -ReleaseAIO(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - CommPacket packet = { - .len = sizeof packet, - .flags = 0, - .opCode = PVTCP_OP_RELEASE, - .data64 = pvsk->peerSock, - .data64ex = PvtcpGetHandle(pvsk) - }; - unsigned long long timeout = COMM_MAX_TO; - - SOCK_OUT_LOCK(pvsk); - CommSvc_Write(pvsk->channel, &packet, &timeout); -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sent 'Release' [0x%p] -> 0x%0x] reply.\n", - __FUNCTION__, pvsk, (unsigned)(pvsk->peerSock))); -#endif - /* - * 'sk' goes away in the final ProcessAIO::sock_put() - */ - SockReleaseWrapper(sock); - SOCK_OUT_UNLOCK(pvsk); - - PvtcpStateRemoveSocket(pvsk->channel, pvsk); -} - - -/** - * @brief Processes socket create reply in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk newly created socket to send ack for. - */ - -static inline void -CreateAIO(PvtcpSock *pvsk) -{ - struct sock *sk; - struct socket *sock; - CommPacket packet = { - .len = sizeof packet, - .flags = 0, - .opCode = PVTCP_OP_CREATE, - .data64 = pvsk->peerSock, - }; - unsigned long long timeout = COMM_MAX_TO; - int rc; - - sk = SkFromPvsk(pvsk); - if (!sk) { - /* - * This is a create-error socket. The error reply has been sent out - * already, by PvtcpFlowAIO(). This is a paranoid safety measure, as - * PVTCP_OP_CREATE OpFlag should not have been set. - */ - - return; - } - - sock = sk->sk_socket; - packet.data64ex = PvtcpGetHandle(pvsk); - - rc = CommSvc_Write(pvsk->channel, &packet, &timeout); - if (rc != packet.len) { - /* We mustn't leak it if PV can't get a hold of it. */ - - PvtcpStateRemoveSocket(pvsk->channel, pvsk); - SockReleaseWrapper(sock); - CommOS_Log(("%s: BOOG -- Couldn't send 'Create' reply [0x%p]!\n", - __FUNCTION__, sk)); - } else { -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sent 'Create' [0x%p] reply [%d].\n", - __FUNCTION__, pvsk, rc)); -#endif - } -} - - -/** - * @brief Processes socket bind in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket being bound. - */ - -static inline void -BindAIO(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - CommPacket packet = { - .len = sizeof packet, - .flags = 0, - .opCode = PVTCP_OP_BIND, - .data64 = pvsk->peerSock - }; - unsigned long long timeout = COMM_MAX_TO; - int rc; - - if (pvsk->peerSockSet) { - if (sk->sk_family == AF_INET) { - struct sockaddr_in sin = { .sin_family = AF_INET }; - int addrLen = sizeof sin; - - rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &addrLen); - if (rc == 0) { - packet.data16 = sin.sin_port; - PvtcpResetLoopbackInet4(pvsk, &sin.sin_addr.s_addr); - packet.data64ex = (unsigned long long)sin.sin_addr.s_addr; - } - } else { /* AF_INET6 */ - struct sockaddr_in6 sin = { .sin6_family = AF_INET6 }; - int addrLen = sizeof sin; - - rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &addrLen); - if (rc == 0) { - packet.data16 = sin.sin6_port; - PvtcpResetLoopbackInet6(pvsk, &sin.sin6_addr); - PvtcpI6AddrPack(&sin.sin6_addr.s6_addr32[0], - &packet.data64ex, &packet.data64ex2); - } - } - - if (rc) { - COMM_OPF_SET_ERR(packet.flags); - packet.data32ex = (unsigned int)(-rc); - packet.opCode = PVTCP_OP_FLOW; - } - CommSvc_Write(pvsk->channel, &packet, &timeout); -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sent 'Bind' [0x%p, %d] reply.\n", - __FUNCTION__, pvsk, rc)); -#endif - } -} - - -/** - * @brief Sends result of setsockopt back to guest. - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket that was modified. - */ - -static inline void -SetSockOptAIO(PvtcpSock *pvsk) -{ - CommPacket packet; - unsigned long long timeout; - - packet.len = sizeof packet; - packet.flags = 0; - packet.opCode = PVTCP_OP_SETSOCKOPT; - packet.data64 = pvsk->peerSock; - packet.data32 = (unsigned int)(pvsk->rpcStatus); - timeout = COMM_MAX_TO; - CommSvc_Write(pvsk->channel, &packet, &timeout); - pvsk->rpcStatus = 0; -} - - -/** - * @brief Sends result of getsockopt back to guest. - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket that was modified. - */ - -static inline void -GetSockOptAIO(PvtcpSock *pvsk) -{ - CommPacket packet = { - .opCode = PVTCP_OP_GETSOCKOPT, - .flags = 0 - }; - unsigned long long timeout = COMM_MAX_TO; - - struct kvec vec[1]; - struct kvec *inVec = vec; - unsigned int vecLen = 1; - unsigned int iovOffset = 0; - - if (pvsk->rpcStatus > 0) { - packet.len = sizeof packet + pvsk->rpcStatus; - vec[0].iov_base = pvsk->rpcReply; - vec[0].iov_len = pvsk->rpcStatus; - } else { - vecLen = 0; - } - - packet.data64 = pvsk->peerSock; - packet.data32 = pvsk->rpcStatus; - - CommSvc_WriteVec(pvsk->channel, &packet, &inVec, &vecLen, - &timeout, &iovOffset); - - if (pvsk->rpcReply) { - CommOS_Kfree(pvsk->rpcReply); - pvsk->rpcReply = NULL; - } - pvsk->rpcStatus = 0; -} - - -/** - * @brief Sends result of ioctl back to guest. - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket that was modified. - */ - -static inline void -IoctlAIO(PvtcpSock *pvsk) -{ - CommPacket packet = { - .len = sizeof packet, - .opCode = PVTCP_OP_IOCTL, - .flags = 0 - }; - unsigned long long timeout = COMM_MAX_TO; - - packet.data64 = pvsk->peerSock; - packet.data32 = pvsk->rpcStatus; - CommSvc_Write(pvsk->channel, &packet, &timeout); - pvsk->rpcStatus = 0; -} - - -/** - * @brief Processes socket listen reply in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket being put in listen mode. - */ - -static inline void -ListenAIO(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - CommPacket packet = { - .len = sizeof packet, - .flags = 0, - .opCode = PVTCP_OP_LISTEN, - .data64 = pvsk->peerSock - }; - unsigned long long timeout = COMM_MAX_TO; - - if (pvsk->peerSockSet) { - if (sk->sk_state != TCP_LISTEN) { - COMM_OPF_SET_ERR(packet.flags); - packet.data32ex = (unsigned int)pvsk->err; - packet.opCode = PVTCP_OP_FLOW; - } - - CommSvc_Write(pvsk->channel, &packet, &timeout); -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sent 'Listen' [0x%p] reply.\n", __FUNCTION__, pvsk)); -#endif - } -} - - -/** - * @brief Processes socket accept reply in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk new socket or socket to accept on (see PvtcpAcceptOp). - */ - -static inline void -AcceptAIO(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - CommPacket packet = { - .len = sizeof packet, - .flags = 0, - .opCode = PVTCP_OP_ACCEPT - }; - unsigned long long timeout = COMM_MAX_TO; - const int enable = 1; - int rc; - - if (pvsk->peerSockSet) { - unsigned long long payloadSocks[2] = { 0, 0 }; - struct kvec payloadVec[] = { - { .iov_base = &payloadSocks, .iov_len = sizeof payloadSocks } - }; - struct kvec *payload = payloadVec; - unsigned int payloadLen = 1; - unsigned int iovOffset = 0; - - packet.len = sizeof packet + sizeof payloadSocks; - - /* - * accept() succeeded, so this is the child socket; its state field - * was temporarily changed to hold the parent/accepting socket. - * The newly accepted socket and its peer need to be put in a - * payload since we use up all available header fields with - * addressing information. Finally, the state field is restored. - */ - - packet.data64 = ((PvtcpSock *)pvsk->state)->peerSock; - pvsk->state = CommSvc_GetState(pvsk->channel); - - payloadSocks[0] = pvsk->peerSock; - payloadSocks[1] = PvtcpGetHandle(pvsk); - - rc = 0; - if (sk->sk_family == AF_INET) { - struct sockaddr_in sin = { .sin_family = AF_INET }; - int addrLen = sizeof sin; - - rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &addrLen); - if (rc == 0) { - packet.data16 = sin.sin_port; - PvtcpResetLoopbackInet4(pvsk, &sin.sin_addr.s_addr); - packet.data64ex = (unsigned long long)sin.sin_addr.s_addr; - } - } else { /* AF_INET6 */ - struct sockaddr_in6 sin = { .sin6_family = AF_INET6 }; - int addrLen = sizeof sin; - - rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &addrLen); - if (rc == 0) { - packet.data16 = sin.sin6_port; - PvtcpResetLoopbackInet6(pvsk, &sin.sin6_addr); - PvtcpI6AddrPack(&sin.sin6_addr.s6_addr32[0], - &packet.data64ex, &packet.data64ex2); - } - } - - if (rc == 0) { - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (void *)&enable, sizeof enable); - kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (void *)&enable, sizeof enable); - kernel_setsockopt(sock, SOL_SOCKET, SO_OOBINLINE, - (void *)&enable, sizeof enable); - } else { - PvtcpStateRemoveSocket(pvsk->channel, pvsk); - SockReleaseWrapper(sock); - COMM_OPF_SET_ERR(packet.flags); - packet.data32ex = (unsigned int)ECONNABORTED; - packet.len = sizeof packet; - packet.opCode = PVTCP_OP_FLOW; - } - - rc = CommSvc_WriteVec(pvsk->channel, &packet, - &payload, &payloadLen, &timeout, &iovOffset); - if ((rc != packet.len) && !COMM_OPF_TEST_ERR(packet.flags)) { - /* Mustn't leak the new socket if PV can't get a hold of it. */ - - PvtcpStateRemoveSocket(pvsk->channel, pvsk); - SockReleaseWrapper(sock); - } -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sent 'Accept' [0x%p] reply.\n", __FUNCTION__, pvsk)); -#endif - } -} - - -/** - * @brief Processes socket connect in an AIO thread. This function is - * called with the socket 'in' lock taken. - * @param[in,out] pvsk socket being connected. - */ - -static inline void -ConnectAIO(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - struct socket *sock = sk->sk_socket; - CommPacket packet = { - .len = sizeof packet, - .flags = 0, - .opCode = PVTCP_OP_CONNECT, - .data64 = pvsk->peerSock - }; - unsigned long long timeout = COMM_MAX_TO; - const int enable = 1; - int rc; - - if (!pvsk->peerSockSet || - (!PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_DISCONNECT) && - (sk->sk_state != TCP_ESTABLISHED))) { - return; - } - - if (PvskTestFlag(pvsk, PVTCP_OFF_PVSKF_DISCONNECT)) { - COMM_OPF_SET_VAL(packet.flags, 1); - PvskSetFlag(pvsk, PVTCP_OFF_PVSKF_DISCONNECT, 0); - } else if (sk->sk_state == TCP_ESTABLISHED) { - if (sk->sk_family == AF_INET) { - struct sockaddr_in sin = { .sin_family = AF_INET }; - int addrLen = sizeof sin; - - rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &addrLen); - if (rc == 0) { - packet.data16 = sin.sin_port; - PvtcpResetLoopbackInet4(pvsk, &sin.sin_addr.s_addr); - packet.data64ex = (unsigned long long)sin.sin_addr.s_addr; - } - } else { /* AF_INET6 */ - struct sockaddr_in6 sin = { .sin6_family = AF_INET6 }; - int addrLen = sizeof sin; - - rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &addrLen); - if (rc == 0) { - packet.data16 = sin.sin6_port; - PvtcpResetLoopbackInet6(pvsk, &sin.sin6_addr); - PvtcpI6AddrPack(&sin.sin6_addr.s6_addr32[0], - &packet.data64ex, &packet.data64ex2); - } - } - - if (rc == 0) { - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (void *)&enable, sizeof enable); - kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (void *)&enable, sizeof enable); - kernel_setsockopt(sock, SOL_SOCKET, SO_OOBINLINE, - (void *)&enable, sizeof enable); - } else { - COMM_OPF_SET_ERR(packet.flags); - packet.data32ex = ECONNABORTED; - packet.opCode = PVTCP_OP_FLOW; - } - } - - CommSvc_Write(pvsk->channel, &packet, &timeout); -#if defined(PVTCP_FULL_DEBUG) - CommOS_Debug(("%s: Sent 'Connect' [0x%p] reply.\n", __FUNCTION__, pvsk)); -#endif -} - - -/** - * @brief Server side main asynchronous processing function. It writes to - * socket queued output buffers, it reads from socket and outputs to PV; it - * also completes operation processing and sends applicable replies to PV. - * Finally, processes error reporting and delta size acks. - * @param arg socket work item. - */ - -void -PvtcpProcessAIO(CommOSWork *arg) -{ - PvtcpSock *pvsk = container_of(arg, PvtcpSock, work); - struct sock *sk = SkFromPvsk(pvsk); - - if (!SOCK_OUT_TRYLOCK(pvsk)) { - /* - * Queued output processing. If trylock failed, we don't retry. - * There are only two reasons for not being able to take the lock: - * - IoOp() has it -- when done, it reschedules us if we're not running. - * - OutputAIO() is already running on another core. - */ - - if (sk && sk->sk_socket) { - PvtcpOutputAIO(pvsk); - } - SOCK_OUT_UNLOCK(pvsk); - } - - /* All other processing needs the socket IN lock. */ - - if (!SOCK_IN_TRYLOCK(pvsk)) { - - if (sk && sk->sk_socket) { - int err; - - /* Input processing. */ - - /* - * Workqueue handlers are pinned to a CPU core and therefore not - * migratable. No need to disable preemption. - */ - err = PvtcpInputAIO(pvsk, perCpuBuf[smp_processor_id()]); - - /* Error and ack notifications. */ - - PvtcpFlowAIO(pvsk, err); - - if (!pvsk->opFlags) { - /* No other operations need to be completed. */ - - goto doneInUnlock; - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_RELEASE)) { - PvskResetOpFlag(pvsk, PVTCP_OP_RELEASE); - ReleaseAIO(pvsk); - - /* All possible in-flight operations must be dropped. */ - goto doneInUnlock; - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_CREATE)) { - /* No state locking required. */ - - PvskResetOpFlag(pvsk, PVTCP_OP_CREATE); - CreateAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_BIND)) { - PvskResetOpFlag(pvsk, PVTCP_OP_BIND); - BindAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_SETSOCKOPT)) { - PvskResetOpFlag(pvsk, PVTCP_OP_SETSOCKOPT); - SetSockOptAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_GETSOCKOPT)) { - PvskResetOpFlag(pvsk, PVTCP_OP_GETSOCKOPT); - GetSockOptAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_IOCTL)) { - PvskResetOpFlag(pvsk, PVTCP_OP_IOCTL); - IoctlAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_LISTEN)) { - PvskResetOpFlag(pvsk, PVTCP_OP_LISTEN); - ListenAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_ACCEPT)) { - PvskResetOpFlag(pvsk, PVTCP_OP_ACCEPT); - AcceptAIO(pvsk); - } - - if (PvskTestOpFlag(pvsk, PVTCP_OP_CONNECT)) { - PvskResetOpFlag(pvsk, PVTCP_OP_CONNECT); - ConnectAIO(pvsk); - } - -doneInUnlock: - SOCK_IN_UNLOCK(pvsk); - } else { - /* - * Special case for error sockets which don't have a sk. - * Note that this socket was created by SockAllocErrInit() and so - * no 'real' socket sits atop it and is not present on any state - * netif list. The socket has a refcnt of one and it will get - * deallocated by the PvtcpPutSock() call below, so we don't need - * to unlock it. - */ - - PvtcpFlowAIO(pvsk, -ENETDOWN); - } - } else { - if ((pvsk->peerSockSet || PvskTestOpFlag(pvsk, PVTCP_OP_RELEASE)) && - sk && sk->sk_socket) { - PvtcpSchedSock(pvsk); - } - } - - PvtcpPutSock(pvsk); -} diff --git a/arch/arm/mvp/pvtcpkm/pvtcp_off_linux.h b/arch/arm/mvp/pvtcpkm/pvtcp_off_linux.h deleted file mode 100644 index 34992da..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp_off_linux.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief Linux Offload definitions. - * This file is only meant to be included via pvtcp_off.h. - */ - -#ifndef _PVTCP_OFF_LINUX_H_ -#define _PVTCP_OFF_LINUX_H_ - -#include <linux/socket.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/tcp.h> -#include <linux/in.h> -#include <linux/in6.h> -#include <linux/skbuff.h> -#include <linux/random.h> -#include <linux/fs.h> -#include <linux/cred.h> - - -typedef struct PvtcpSock { - struct sock *sk; - PVTCP_SOCK_COMMON_FIELDS; - PVTCP_OFF_SOCK_COMMON_FIELDS; - void (*destruct)(struct sock *sk); - void (*stateChange)(struct sock *sk); - void (*dataReady)(struct sock *sk, int bytes); - void (*writeSpace)(struct sock *sk); - void (*errorReport)(struct sock *sk); -} PvtcpSock; - - -typedef enum PvtcpSockNamespace { - PVTCP_SOCK_NAMESPACE_INITIAL, - PVTCP_SOCK_NAMESPACE_CHANNEL -} PvtcpSockNamespace; - - -/* Number of large datagram allocations. */ -extern unsigned long long pvtcpOffDgramAllocations; - -/* Inet4 loopback addresses. */ -extern unsigned int pvtcpLoopbackOffAddr; - -/* Get the 'struct sock' from a PvtcpSock. */ -#define SkFromPvsk(pvsk) ((pvsk)->sk) - -/* Get the PvtcpSock from a 'struct sock'. */ -#define PvskFromSk(sk) ((PvtcpSock *)(sk)->sk_user_data) - -int -PvtcpTestAndBindLoopbackInet4(PvtcpSock *pvsk, - unsigned int *addr, - unsigned short port); -int -PvtcpTestAndBindLoopbackInet6(PvtcpSock *pvsk, - unsigned long long *addr0, - unsigned long long *addr1, - unsigned short port); - -void PvtcpResetLoopbackInet4(PvtcpSock *pvsk, unsigned int *addr); -void PvtcpResetLoopbackInet6(PvtcpSock *pvsk, struct in6_addr *in6); - -void PvtcpFlowAIO(PvtcpSock *pvsk, int eof); -void PvtcpOutputAIO(PvtcpSock *pvsk); -int PvtcpInputAIO(PvtcpSock *pvsk, void *perCpuBuf); - - -/** - * @brief Switches a socket to the channel, or the initial name space. - * @param pvsk socket to switch. - * @param ns which namespace to switch to. - */ - -static inline void -PvtcpSwitchSock(PvtcpSock *pvsk, - PvtcpSockNamespace ns) -{ -#if defined(CONFIG_NET_NS) && !defined(PVTCP_NET_NS_DISABLE) - struct sock *sk; - struct net *prevNet; - - if (!pvsk) { - return; - } - sk = SkFromPvsk(pvsk); - if (!sk) { - /* If this is a phony, create fail reporting pvsk, just return. */ - - return; - } - - prevNet = sock_net(sk); - switch (ns) { - case PVTCP_SOCK_NAMESPACE_INITIAL: - sock_net_set(sk, get_net(&init_net)); - break; - case PVTCP_SOCK_NAMESPACE_CHANNEL: - sock_net_set(sk, get_net(pvsk->state->namespace)); - break; - } - put_net(prevNet); -#endif -} - - -/** - * @brief Tests whether a socket has an explicit namespace. - * @param pvsk socket to test. - * @return 1 if the socket has a namespace, 0 otherwise. - */ - -static inline int -PvtcpHasSockNamespace(PvtcpSock *pvsk) -{ -#if defined(CONFIG_NET_NS) && !defined(PVTCP_NET_NS_DISABLE) - struct sock *sk; - int rc = 0; - - if (!pvsk) { - return rc; - } - sk = SkFromPvsk(pvsk); - if (!sk) { - /* If this is a phony, create fail reporting pvsk, just return 0. */ - - return rc; - } - - rc = (sock_net(sk) != &init_net); - return rc; -#else - return 0; -#endif -} - - -/** - * @brief Retains the pvsock's underlying socket. - * @param pvsk socket to retain. - */ - -static inline void -PvtcpHoldSock(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - - if (likely(sk)) { - sock_hold(sk); - } -} - - -/** - * @brief Releases a hold on the pvsock's underlying socket. If the underlying - * socket is NULL, this is an error socket and we deallocate it. - * @param pvsk socket to release hold on. - */ - -static inline void -PvtcpPutSock(PvtcpSock *pvsk) -{ - struct sock *sk = SkFromPvsk(pvsk); - - if (likely(sk)) { - sock_put(sk); - } else { - /* - * This is an error socket, which does _not_ have an underlying socket. - * We simply need to free it. - */ - - CommOS_Kfree(pvsk); - } -} - - -/** - * @brief Schedules an offload socket for AIO. - * @param pvsk socket to schedule. - * @sideeffect the socket will be processed by AIO threads. - */ - -static inline void -PvtcpSchedSock(PvtcpSock *pvsk) -{ - /* - * We must hold the socket before we enqueue it for AIO, such that it may - * not be released while in the workqueue. If CommSvc_ScheduleAIOWork() - * returned non-zero, it means the socket had already been enqueued. In - * that case, we release the hold. Otherwise, the hold is released by the - * AIO function (PvtcpProcessAIO()). - * Note that error pv sockets may only originate from synchronized RPCs, - * or to be more precise, from PvtcpCreateOp(), and not from IO processing; - * this means that they cannot be attempted to be enqueued more than once. - */ - - PvtcpHoldSock(pvsk); - if (CommSvc_ScheduleAIOWork(&pvsk->work)) { - PvtcpPutSock(pvsk); - } -} - -#endif // _PVTCP_OFF_LINUX_H_ diff --git a/arch/arm/mvp/pvtcpkm/pvtcp_off_linux_shim.S b/arch/arm/mvp/pvtcpkm/pvtcp_off_linux_shim.S deleted file mode 100644 index 824286b..0000000 --- a/arch/arm/mvp/pvtcpkm/pvtcp_off_linux_shim.S +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Linux 2.6.32 and later Kernel module for VMware MVP PVTCP Server - * - * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; see the file COPYING. If not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#line 5 - -/** - * @file - * - * @brief PVTCP socket destructor shim. - * - * The module reference accounting code for socket destruction in the core - * Linux kernel does not know about PVTCP sockets, so it does not properly - * increment/decrement the reference count on pvtcpkm when calling through a - * function pointer into our destructor. If a module unload is requested on - * pvtcpkm while a socket is being destroyed, it is possible for the destructor - * to be preempted after decrementing the module reference count but before - * returning to the core kernel. If the module code is unmapped before the - * function return, it is possible that we will attempt to execute unmapped - * code, resulting in a host crash. - * - * This shim proxies socket destruction requests through to the PVTCP socket - * destructor, then jumps directly to module_put to drop the reference count. - * module_put will return directly to the caller, eliminating the race. - */ - -.text -.p2align 4 - -.global asmDestructorShim - -/** - * @brief Socket destructor callback. Calls into pvtcpkm to destroy a socket - * and then decrements the refcount. - * @param r0 pointer to struct sock - */ - -asmDestructorShim: - push {lr} - ldr r1, targetAddr @ Destroy socket - blx r1 - pop {lr} - cmp r0, #0 - bxne lr @ We shouldn't module_put, just return. - ldr r0, owner - ldr r1, modulePutAddr @ Jump to module_put. module_put - bx r1 @ returns directly to caller - -owner: - .word __this_module - -targetAddr: - .word DestructCB - -modulePutAddr: - .word module_put |