diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-03-21 12:08:21 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-03-21 12:08:21 -0500 |
commit | ac58c9059da8886b5e8cde012a80266b18ca146e (patch) | |
tree | 40bf486843a2cace6c3a959d73423e50e6aa0c00 | |
parent | df6db302cb236ac3a683d535a3e2073d9f4b2833 (diff) | |
parent | c4a1745aa09fc110afdefea0e5d025043e348bae (diff) | |
download | kernel_samsung_smdk4412-ac58c9059da8886b5e8cde012a80266b18ca146e.zip kernel_samsung_smdk4412-ac58c9059da8886b5e8cde012a80266b18ca146e.tar.gz kernel_samsung_smdk4412-ac58c9059da8886b5e8cde012a80266b18ca146e.tar.bz2 |
Merge branch 'linus'
318 files changed, 20896 insertions, 9982 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 1c95588..2975291 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -9,7 +9,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml \ - sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \ + kernel-api.xml journal-api.xml lsm.xml usb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml ### diff --git a/Documentation/DocBook/sis900.tmpl b/Documentation/DocBook/sis900.tmpl deleted file mode 100644 index 6c2cbac..0000000 --- a/Documentation/DocBook/sis900.tmpl +++ /dev/null @@ -1,585 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" - "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> - -<book id="SiS900Guide"> - -<bookinfo> - -<title>SiS 900/7016 Fast Ethernet Device Driver</title> - -<authorgroup> -<author> -<firstname>Ollie</firstname> -<surname>Lho</surname> -</author> - -<author> -<firstname>Lei Chun</firstname> -<surname>Chang</surname> -</author> -</authorgroup> - -<edition>Document Revision: 0.3 for SiS900 driver v1.06 & v1.07</edition> -<pubdate>November 16, 2000</pubdate> - -<copyright> - <year>1999</year> - <holder>Silicon Integrated System Corp.</holder> -</copyright> - -<legalnotice> - <para> - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - </para> - - <para> - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - </para> - - <para> - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - </para> -</legalnotice> - -<abstract> -<para> -This document gives some information on installation and usage of SiS 900/7016 -device driver under Linux. -</para> -</abstract> - -</bookinfo> - -<toc></toc> - -<chapter id="intro"> - <title>Introduction</title> - -<para> -This document describes the revision 1.06 and 1.07 of SiS 900/7016 Fast Ethernet -device driver under Linux. The driver is developed by Silicon Integrated -System Corp. and distributed freely under the GNU General Public License (GPL). -The driver can be compiled as a loadable module and used under Linux kernel -version 2.2.x. (rev. 1.06) -With minimal changes, the driver can also be used under 2.3.x and 2.4.x kernel -(rev. 1.07), please see -<xref linkend="install"/>. If you are intended to -use the driver for earlier kernels, you are on your own. -</para> - -<para> -The driver is tested with usual TCP/IP applications including -FTP, Telnet, Netscape etc. and is used constantly by the developers. -</para> - -<para> -Please send all comments/fixes/questions to -<ulink url="mailto:lcchang@sis.com.tw">Lei-Chun Chang</ulink>. -</para> -</chapter> - -<chapter id="changes"> - <title>Changes</title> - -<para> -Changes made in Revision 1.07 - -<orderedlist> -<listitem> -<para> -Separation of sis900.c and sis900.h in order to move most -constant definition to sis900.h (many of those constants were -corrected) -</para> -</listitem> - -<listitem> -<para> -Clean up PCI detection, the pci-scan from Donald Becker were not used, -just simple pci_find_*. -</para> -</listitem> - -<listitem> -<para> -MII detection is modified to support multiple mii transceiver. -</para> -</listitem> - -<listitem> -<para> -Bugs in read_eeprom, mdio_* were removed. -</para> -</listitem> - -<listitem> -<para> -Lot of sis900 irrelevant comments were removed/changed and -more comments were added to reflect the real situation. -</para> -</listitem> - -<listitem> -<para> -Clean up of physical/virtual address space mess in buffer -descriptors. -</para> -</listitem> - -<listitem> -<para> -Better transmit/receive error handling. -</para> -</listitem> - -<listitem> -<para> -The driver now uses zero-copy single buffer management -scheme to improve performance. -</para> -</listitem> - -<listitem> -<para> -Names of variables were changed to be more consistent. -</para> -</listitem> - -<listitem> -<para> -Clean up of auo-negotiation and timer code. -</para> -</listitem> - -<listitem> -<para> -Automatic detection and change of PHY on the fly. -</para> -</listitem> - -<listitem> -<para> -Bug in mac probing fixed. -</para> -</listitem> - -<listitem> -<para> -Fix 630E equalier problem by modifying the equalizer workaround rule. -</para> -</listitem> - -<listitem> -<para> -Support for ICS1893 10/100 Interated PHYceiver. -</para> -</listitem> - -<listitem> -<para> -Support for media select by ifconfig. -</para> -</listitem> - -<listitem> -<para> -Added kernel-doc extratable documentation. -</para> -</listitem> - -</orderedlist> -</para> -</chapter> - -<chapter id="tested"> - <title>Tested Environment</title> - -<para> -This driver is developed on the following hardware - -<itemizedlist> -<listitem> - -<para> -Intel Celeron 500 with SiS 630 (rev 02) chipset -</para> -</listitem> -<listitem> - -<para> -SiS 900 (rev 01) and SiS 7016/7014 Fast Ethernet Card -</para> -</listitem> - -</itemizedlist> - -and tested with these software environments - -<itemizedlist> -<listitem> - -<para> -Red Hat Linux version 6.2 -</para> -</listitem> -<listitem> - -<para> -Linux kernel version 2.4.0 -</para> -</listitem> -<listitem> - -<para> -Netscape version 4.6 -</para> -</listitem> -<listitem> - -<para> -NcFTP 3.0.0 beta 18 -</para> -</listitem> -<listitem> - -<para> -Samba version 2.0.3 -</para> -</listitem> - -</itemizedlist> - -</para> - -</chapter> - -<chapter id="files"> -<title>Files in This Package</title> - -<para> -In the package you can find these files: -</para> - -<para> -<variablelist> - -<varlistentry> -<term>sis900.c</term> -<listitem> -<para> -Driver source file in C -</para> -</listitem> -</varlistentry> - -<varlistentry> -<term>sis900.h</term> -<listitem> -<para> -Header file for sis900.c -</para> -</listitem> -</varlistentry> - -<varlistentry> -<term>sis900.sgml</term> -<listitem> -<para> -DocBook SGML source of the document -</para> -</listitem> -</varlistentry> - -<varlistentry> -<term>sis900.txt</term> -<listitem> -<para> -Driver document in plain text -</para> -</listitem> -</varlistentry> - -</variablelist> -</para> -</chapter> - -<chapter id="install"> - <title>Installation</title> - -<para> -Silicon Integrated System Corp. is cooperating closely with core Linux Kernel -developers. The revisions of SiS 900 driver are distributed by the usuall channels -for kernel tar files and patches. Those kernel tar files for official kernel and -patches for kernel pre-release can be download at -<ulink url="http://ftp.kernel.org/pub/linux/kernel/">official kernel ftp site</ulink> -and its mirrors. -The 1.06 revision can be found in kernel version later than 2.3.15 and pre-2.2.14, -and 1.07 revision can be found in kernel version 2.4.0. -If you have no prior experience in networking under Linux, please read -<ulink url="http://www.tldp.org/">Ethernet HOWTO</ulink> and -<ulink url="http://www.tldp.org/">Networking HOWTO</ulink> available from -Linux Documentation Project (LDP). -</para> - -<para> -The driver is bundled in release later than 2.2.11 and 2.3.15 so this -is the most easy case. -Be sure you have the appropriate packages for compiling kernel source. -Those packages are listed in Document/Changes in kernel source -distribution. If you have to install the driver other than those bundled -in kernel release, you should have your driver file -<filename>sis900.c</filename> and <filename>sis900.h</filename> -copied into <filename class="directory">/usr/src/linux/drivers/net/</filename> first. -There are two alternative ways to install the driver -</para> - -<sect1> -<title>Building the driver as loadable module</title> - -<para> -To build the driver as a loadable kernel module you have to reconfigure -the kernel to activate network support by -</para> - -<para><screen> -make menuconfig -</screen></para> - -<para> -Choose <quote>Loadable module support ---></quote>, -then select <quote>Enable loadable module support</quote>. -</para> - -<para> -Choose <quote>Network Device Support ---></quote>, select -<quote>Ethernet (10 or 100Mbit)</quote>. -Then select <quote>EISA, VLB, PCI and on board controllers</quote>, -and choose <quote>SiS 900/7016 PCI Fast Ethernet Adapter support</quote> -to <quote>M</quote>. -</para> - -<para> -After reconfiguring the kernel, you can make the driver module by -</para> - -<para><screen> -make modules -</screen></para> - -<para> -The driver should be compiled with no errors. After compiling the driver, -the driver can be installed to proper place by -</para> - -<para><screen> -make modules_install -</screen></para> - -<para> -Load the driver into kernel by -</para> - -<para><screen> -insmod sis900 -</screen></para> - -<para> -When loading the driver into memory, some information message can be view by -</para> - -<para> -<screen> -dmesg -</screen> - -or - -<screen> -cat /var/log/message -</screen> -</para> - -<para> -If the driver is loaded properly you will have messages similar to this: -</para> - -<para><screen> -sis900.c: v1.07.06 11/07/2000 -eth0: SiS 900 PCI Fast Ethernet at 0xd000, IRQ 10, 00:00:e8:83:7f:a4. -eth0: SiS 900 Internal MII PHY transceiver found at address 1. -eth0: Using SiS 900 Internal MII PHY as default -</screen></para> - -<para> -showing the version of the driver and the results of probing routine. -</para> - -<para> -Once the driver is loaded, network can be brought up by -</para> - -<para><screen> -/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE -</screen></para> - -<para> -where IPADDR, BROADCAST, NETMASK are your IP address, broadcast address and -netmask respectively. TYPE is used to set medium type used by the device. -Typical values are "10baseT"(twisted-pair 10Mbps Ethernet) or "100baseT" -(twisted-pair 100Mbps Ethernet). For more information on how to configure -network interface, please refer to -<ulink url="http://www.tldp.org/">Networking HOWTO</ulink>. -</para> - -<para> -The link status is also shown by kernel messages. For example, after the -network interface is activated, you may have the message: -</para> - -<para><screen> -eth0: Media Link On 100mbps full-duplex -</screen></para> - -<para> -If you try to unplug the twist pair (TP) cable you will get -</para> - -<para><screen> -eth0: Media Link Off -</screen></para> - -<para> -indicating that the link is failed. -</para> -</sect1> - -<sect1> -<title>Building the driver into kernel</title> - -<para> -If you want to make the driver into kernel, choose <quote>Y</quote> -rather than <quote>M</quote> on -<quote>SiS 900/7016 PCI Fast Ethernet Adapter support</quote> -when configuring the kernel. Build the kernel image in the usual way -</para> - -<para><screen> -make clean - -make bzlilo -</screen></para> - -<para> -Next time the system reboot, you have the driver in memory. -</para> - -</sect1> -</chapter> - -<chapter id="problems"> - <title>Known Problems and Bugs</title> - -<para> -There are some known problems and bugs. If you find any other bugs please -mail to <ulink url="mailto:lcchang@sis.com.tw">lcchang@sis.com.tw</ulink> - -<orderedlist> - -<listitem> -<para> -AM79C901 HomePNA PHY is not thoroughly tested, there may be some -bugs in the <quote>on the fly</quote> change of transceiver. -</para> -</listitem> - -<listitem> -<para> -A bug is hidden somewhere in the receive buffer management code, -the bug causes NULL pointer reference in the kernel. This fault is -caught before bad things happen and reported with the message: - -<computeroutput> -eth0: NULL pointer encountered in Rx ring, skipping -</computeroutput> - -which can be viewed with <literal remap="tt">dmesg</literal> or -<literal remap="tt">cat /var/log/message</literal>. -</para> -</listitem> - -<listitem> -<para> -The media type change from 10Mbps to 100Mbps twisted-pair ethernet -by ifconfig causes the media link down. -</para> -</listitem> - -</orderedlist> -</para> -</chapter> - -<chapter id="RHistory"> - <title>Revision History</title> - -<para> -<itemizedlist> - -<listitem> -<para> -November 13, 2000, Revision 1.07, seventh release, 630E problem fixed -and further clean up. -</para> -</listitem> - -<listitem> -<para> -November 4, 1999, Revision 1.06, Second release, lots of clean up -and optimization. -</para> -</listitem> - -<listitem> -<para> -August 8, 1999, Revision 1.05, Initial Public Release -</para> -</listitem> - -</itemizedlist> -</para> -</chapter> - -<chapter id="acknowledgements"> - <title>Acknowledgements</title> - -<para> -This driver was originally derived form -<ulink url="mailto:becker@cesdis1.gsfc.nasa.gov">Donald Becker</ulink>'s -<ulink url="ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/kern-2.3/pci-skeleton.c" ->pci-skeleton</ulink> and -<ulink url="ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/kern-2.3/rtl8139.c" ->rtl8139</ulink> drivers. Donald also provided various suggestion -regarded with improvements made in revision 1.06. -</para> - -<para> -The 1.05 revision was created by -<ulink url="mailto:cmhuang@sis.com.tw">Jim Huang</ulink>, AMD 79c901 -support was added by <ulink url="mailto:lcs@sis.com.tw">Chin-Shan Li</ulink>. -</para> -</chapter> - -<chapter id="functions"> -<title>List of Functions</title> -!Idrivers/net/sis900.c -</chapter> - -</book> diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 81bc513..28a31c5e 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -151,6 +151,13 @@ Who: Ralf Baechle <ralf@linux-mips.org> --------------------------- +What: eepro100 network driver +When: January 2007 +Why: replaced by the e100 driver +Who: Adrian Bunk <bunk@stusta.de> + +--------------------------- + What: Legacy /proc/pci interface (PCI_LEGACY_PROC) When: March 2006 Why: deprecated since 2.5.53 in favor of lspci(8) diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 5b01d5c..b1181ce 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -92,8 +92,6 @@ routing.txt - the new routing mechanism shaper.txt - info on the module that can shape/limit transmitted traffic. -sis900.txt - - SiS 900/7016 Fast Ethernet device driver info. sk98lin.txt - Marvell Yukon Chipset / SysKonnect SK-98xx compliant Gigabit Ethernet Adapter family driver info diff --git a/Documentation/networking/README.ipw2100 b/Documentation/networking/README.ipw2100 index 3ab4037..f3fcaa4 100644 --- a/Documentation/networking/README.ipw2100 +++ b/Documentation/networking/README.ipw2100 @@ -3,18 +3,18 @@ Intel(R) PRO/Wireless 2100 Driver for Linux in support of: Intel(R) PRO/Wireless 2100 Network Connection -Copyright (C) 2003-2005, Intel Corporation +Copyright (C) 2003-2006, Intel Corporation README.ipw2100 -Version: 1.1.3 -Date : October 17, 2005 +Version: git-1.1.5 +Date : January 25, 2006 Index ----------------------------------------------- 0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER 1. Introduction -2. Release 1.1.3 Current Features +2. Release git-1.1.5 Current Features 3. Command Line Parameters 4. Sysfs Helper Files 5. Radio Kill Switch @@ -89,7 +89,7 @@ potential fixes and patches, as well as links to the development mailing list for the driver project. -2. Release 1.1.3 Current Supported Features +2. Release git-1.1.5 Current Supported Features ----------------------------------------------- - Managed (BSS) and Ad-Hoc (IBSS) - WEP (shared key and open) @@ -270,7 +270,7 @@ For installation support on the ipw2100 1.1.0 driver on Linux kernels 9. License ----------------------------------------------- - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License (version 2) as diff --git a/Documentation/networking/README.ipw2200 b/Documentation/networking/README.ipw2200 index c6492d3..acb30c5 100644 --- a/Documentation/networking/README.ipw2200 +++ b/Documentation/networking/README.ipw2200 @@ -10,7 +10,7 @@ both hardware adapters listed above. In this document the Intel(R) PRO/Wireless 2915ABG Driver for Linux will be used to reference the unified driver. -Copyright (C) 2004-2005, Intel Corporation +Copyright (C) 2004-2006, Intel Corporation README.ipw2200 @@ -26,9 +26,11 @@ Index 1.2. Module parameters 1.3. Wireless Extension Private Methods 1.4. Sysfs Helper Files +1.5. Supported channels 2. Ad-Hoc Networking 3. Interacting with Wireless Tools 3.1. iwconfig mode +3.2. iwconfig sens 4. About the Version Numbers 5. Firmware installation 6. Support @@ -314,6 +316,35 @@ For the device level files, see /sys/bus/pci/drivers/ipw2200: running ifconfig and is therefore disabled by default. +1.5. Supported channels +----------------------------------------------- + +Upon loading the Intel(R) PRO/Wireless 2915ABG Driver for Linux, a +message stating the detected geography code and the number of 802.11 +channels supported by the card will be displayed in the log. + +The geography code corresponds to a regulatory domain as shown in the +table below. + + Supported channels +Code Geography 802.11bg 802.11a + +--- Restricted 11 0 +ZZF Custom US/Canada 11 8 +ZZD Rest of World 13 0 +ZZA Custom USA & Europe & High 11 13 +ZZB Custom NA & Europe 11 13 +ZZC Custom Japan 11 4 +ZZM Custom 11 0 +ZZE Europe 13 19 +ZZJ Custom Japan 14 4 +ZZR Rest of World 14 0 +ZZH High Band 13 4 +ZZG Custom Europe 13 4 +ZZK Europe 13 24 +ZZL Europe 11 13 + + 2. Ad-Hoc Networking ----------------------------------------------- @@ -353,6 +384,15 @@ When configuring the mode of the adapter, all run-time configured parameters are reset to the value used when the module was loaded. This includes channels, rates, ESSID, etc. +3.2 iwconfig sens +----------------------------------------------- + +The 'iwconfig ethX sens XX' command will not set the signal sensitivity +threshold, as described in iwconfig documentation, but rather the number +of consecutive missed beacons that will trigger handover, i.e. roaming +to another access point. At the same time, it will set the disassociation +threshold to 3 times the given value. + 4. About the Version Numbers ----------------------------------------------- @@ -408,7 +448,7 @@ For general information and support, go to: 7. License ----------------------------------------------- - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as diff --git a/Documentation/networking/sis900.txt b/Documentation/networking/sis900.txt deleted file mode 100644 index bddffd7..0000000 --- a/Documentation/networking/sis900.txt +++ /dev/null @@ -1,257 +0,0 @@ - -SiS 900/7016 Fast Ethernet Device Driver - -Ollie Lho - -Lei Chun Chang - - Copyright © 1999 by Silicon Integrated System Corp. - - This document gives some information on installation and usage of SiS - 900/7016 device driver under Linux. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or (at - your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - USA - _________________________________________________________________ - - Table of Contents - 1. Introduction - 2. Changes - 3. Tested Environment - 4. Files in This Package - 5. Installation - - Building the driver as loadable module - Building the driver into kernel - - 6. Known Problems and Bugs - 7. Revision History - 8. Acknowledgements - _________________________________________________________________ - -Chapter 1. Introduction - - This document describes the revision 1.06 and 1.07 of SiS 900/7016 - Fast Ethernet device driver under Linux. The driver is developed by - Silicon Integrated System Corp. and distributed freely under the GNU - General Public License (GPL). The driver can be compiled as a loadable - module and used under Linux kernel version 2.2.x. (rev. 1.06) With - minimal changes, the driver can also be used under 2.3.x and 2.4.x - kernel (rev. 1.07), please see Chapter 5. If you are intended to use - the driver for earlier kernels, you are on your own. - - The driver is tested with usual TCP/IP applications including FTP, - Telnet, Netscape etc. and is used constantly by the developers. - - Please send all comments/fixes/questions to Lei-Chun Chang. - _________________________________________________________________ - -Chapter 2. Changes - - Changes made in Revision 1.07 - - 1. Separation of sis900.c and sis900.h in order to move most constant - definition to sis900.h (many of those constants were corrected) - 2. Clean up PCI detection, the pci-scan from Donald Becker were not - used, just simple pci_find_*. - 3. MII detection is modified to support multiple mii transceiver. - 4. Bugs in read_eeprom, mdio_* were removed. - 5. Lot of sis900 irrelevant comments were removed/changed and more - comments were added to reflect the real situation. - 6. Clean up of physical/virtual address space mess in buffer - descriptors. - 7. Better transmit/receive error handling. - 8. The driver now uses zero-copy single buffer management scheme to - improve performance. - 9. Names of variables were changed to be more consistent. - 10. Clean up of auo-negotiation and timer code. - 11. Automatic detection and change of PHY on the fly. - 12. Bug in mac probing fixed. - 13. Fix 630E equalier problem by modifying the equalizer workaround - rule. - 14. Support for ICS1893 10/100 Interated PHYceiver. - 15. Support for media select by ifconfig. - 16. Added kernel-doc extratable documentation. - _________________________________________________________________ - -Chapter 3. Tested Environment - - This driver is developed on the following hardware - - * Intel Celeron 500 with SiS 630 (rev 02) chipset - * SiS 900 (rev 01) and SiS 7016/7014 Fast Ethernet Card - - and tested with these software environments - - * Red Hat Linux version 6.2 - * Linux kernel version 2.4.0 - * Netscape version 4.6 - * NcFTP 3.0.0 beta 18 - * Samba version 2.0.3 - _________________________________________________________________ - -Chapter 4. Files in This Package - - In the package you can find these files: - - sis900.c - Driver source file in C - - sis900.h - Header file for sis900.c - - sis900.sgml - DocBook SGML source of the document - - sis900.txt - Driver document in plain text - _________________________________________________________________ - -Chapter 5. Installation - - Silicon Integrated System Corp. is cooperating closely with core Linux - Kernel developers. The revisions of SiS 900 driver are distributed by - the usuall channels for kernel tar files and patches. Those kernel tar - files for official kernel and patches for kernel pre-release can be - download at official kernel ftp site and its mirrors. The 1.06 - revision can be found in kernel version later than 2.3.15 and - pre-2.2.14, and 1.07 revision can be found in kernel version 2.4.0. If - you have no prior experience in networking under Linux, please read - Ethernet HOWTO and Networking HOWTO available from Linux Documentation - Project (LDP). - - The driver is bundled in release later than 2.2.11 and 2.3.15 so this - is the most easy case. Be sure you have the appropriate packages for - compiling kernel source. Those packages are listed in Document/Changes - in kernel source distribution. If you have to install the driver other - than those bundled in kernel release, you should have your driver file - sis900.c and sis900.h copied into /usr/src/linux/drivers/net/ first. - There are two alternative ways to install the driver - _________________________________________________________________ - -Building the driver as loadable module - - To build the driver as a loadable kernel module you have to - reconfigure the kernel to activate network support by - -make menuconfig - - Choose "Loadable module support --->", then select "Enable loadable - module support". - - Choose "Network Device Support --->", select "Ethernet (10 or - 100Mbit)". Then select "EISA, VLB, PCI and on board controllers", and - choose "SiS 900/7016 PCI Fast Ethernet Adapter support" to "M". - - After reconfiguring the kernel, you can make the driver module by - -make modules - - The driver should be compiled with no errors. After compiling the - driver, the driver can be installed to proper place by - -make modules_install - - Load the driver into kernel by - -insmod sis900 - - When loading the driver into memory, some information message can be - view by - -dmesg - - or -cat /var/log/message - - If the driver is loaded properly you will have messages similar to - this: - -sis900.c: v1.07.06 11/07/2000 -eth0: SiS 900 PCI Fast Ethernet at 0xd000, IRQ 10, 00:00:e8:83:7f:a4. -eth0: SiS 900 Internal MII PHY transceiver found at address 1. -eth0: Using SiS 900 Internal MII PHY as default - - showing the version of the driver and the results of probing routine. - - Once the driver is loaded, network can be brought up by - -/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE - - where IPADDR, BROADCAST, NETMASK are your IP address, broadcast - address and netmask respectively. TYPE is used to set medium type used - by the device. Typical values are "10baseT"(twisted-pair 10Mbps - Ethernet) or "100baseT" (twisted-pair 100Mbps Ethernet). For more - information on how to configure network interface, please refer to - Networking HOWTO. - - The link status is also shown by kernel messages. For example, after - the network interface is activated, you may have the message: - -eth0: Media Link On 100mbps full-duplex - - If you try to unplug the twist pair (TP) cable you will get - -eth0: Media Link Off - - indicating that the link is failed. - _________________________________________________________________ - -Building the driver into kernel - - If you want to make the driver into kernel, choose "Y" rather than "M" - on "SiS 900/7016 PCI Fast Ethernet Adapter support" when configuring - the kernel. Build the kernel image in the usual way - -make clean - -make bzlilo - - Next time the system reboot, you have the driver in memory. - _________________________________________________________________ - -Chapter 6. Known Problems and Bugs - - There are some known problems and bugs. If you find any other bugs - please mail to lcchang@sis.com.tw - - 1. AM79C901 HomePNA PHY is not thoroughly tested, there may be some - bugs in the "on the fly" change of transceiver. - 2. A bug is hidden somewhere in the receive buffer management code, - the bug causes NULL pointer reference in the kernel. This fault is - caught before bad things happen and reported with the message: - eth0: NULL pointer encountered in Rx ring, skipping which can be - viewed with dmesg or cat /var/log/message. - 3. The media type change from 10Mbps to 100Mbps twisted-pair ethernet - by ifconfig causes the media link down. - _________________________________________________________________ - -Chapter 7. Revision History - - * November 13, 2000, Revision 1.07, seventh release, 630E problem - fixed and further clean up. - * November 4, 1999, Revision 1.06, Second release, lots of clean up - and optimization. - * August 8, 1999, Revision 1.05, Initial Public Release - _________________________________________________________________ - -Chapter 8. Acknowledgements - - This driver was originally derived form Donald Becker's pci-skeleton - and rtl8139 drivers. Donald also provided various suggestion regarded - with improvements made in revision 1.06. - - The 1.05 revision was created by Jim Huang, AMD 79c901 support was - added by Chin-Shan Li. diff --git a/arch/ppc/platforms/hdpu.c b/arch/ppc/platforms/hdpu.c index 50039a2..f945416 100644 --- a/arch/ppc/platforms/hdpu.c +++ b/arch/ppc/platforms/hdpu.c @@ -319,11 +319,10 @@ static void __init hdpu_fixup_eth_pdata(struct platform_device *pd) struct mv643xx_eth_platform_data *eth_pd; eth_pd = pd->dev.platform_data; - eth_pd->port_serial_control = - mv64x60_read(&bh, MV643XX_ETH_PORT_SERIAL_CONTROL_REG(pd->id) & ~1); - eth_pd->force_phy_addr = 1; eth_pd->phy_addr = pd->id; + eth_pd->speed = SPEED_100; + eth_pd->duplex = DUPLEX_FULL; eth_pd->tx_queue_size = 400; eth_pd->rx_queue_size = 800; } diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index d39c9f2..460f72e 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -217,7 +217,7 @@ static void _sparc_free_io(struct resource *res) unsigned long plen; plen = res->end - res->start + 1; - if ((plen & (PAGE_SIZE-1)) != 0) BUG(); + BUG_ON((plen & (PAGE_SIZE-1)) != 0); sparc_unmapiorange(res->start, plen); release_resource(res); } @@ -512,8 +512,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba) dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ return virt_to_phys(ptr); } @@ -528,8 +527,7 @@ dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); @@ -542,8 +540,7 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size, dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, unsigned long offset, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ return page_to_phys(page) + offset; } @@ -551,8 +548,7 @@ dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* mmu_inval_dma_area XXX */ } @@ -576,11 +572,10 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); sg->dvma_address = virt_to_phys(page_address(sg->page)); sg->dvma_length = sg->length; sg++; @@ -597,11 +592,10 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); mmu_inval_dma_area( (unsigned long) page_address(sg->page), (sg->length + PAGE_SIZE-1) & PAGE_MASK); @@ -622,8 +616,7 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, */ void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); @@ -632,8 +625,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t si void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); @@ -650,11 +642,10 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); mmu_inval_dma_area( (unsigned long) page_address(sg->page), (sg->length + PAGE_SIZE-1) & PAGE_MASK); @@ -667,11 +658,10 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, i { int n; - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); if (direction != PCI_DMA_TODEVICE) { for (n = 0; n < nents; n++) { - if (page_address(sg->page) == NULL) BUG(); + BUG_ON(page_address(sg->page) == NULL); mmu_inval_dma_area( (unsigned long) page_address(sg->page), (sg->length + PAGE_SIZE-1) & PAGE_MASK); diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 4c0a50a..c3685b3 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -186,6 +186,15 @@ endchoice endmenu +config ARCH_SPARSEMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + +config LARGE_ALLOCS + def_bool y + source "mm/Kconfig" config GENERIC_ISA_DMA @@ -350,6 +359,15 @@ config SOLARIS_EMUL endmenu +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default y + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with UltraSPARC cpus at a cost of slightly increased + overhead in some places. If unsure say N here. + config CMDLINE_BOOL bool "Default bootloader kernel arguments" diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 069d497..f819a96 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc2 -# Tue Feb 7 17:47:18 2006 +# Linux kernel version: 2.6.16 +# Mon Mar 20 01:23:21 2006 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -115,14 +115,20 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_HUGETLB_PAGE_SIZE_4MB=y # CONFIG_HUGETLB_PAGE_SIZE_512K is not set # CONFIG_HUGETLB_PAGE_SIZE_64K is not set +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_LARGE_ALLOCS=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_MEMORY_HOTPLUG=y CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y CONFIG_GENERIC_ISA_DMA=y CONFIG_SBUS=y CONFIG_SBUSCHAR=y @@ -655,6 +661,7 @@ CONFIG_SERIAL_SUNCORE=y CONFIG_SERIAL_SUNSU=y CONFIG_SERIAL_SUNSU_CONSOLE=y CONFIG_SERIAL_SUNSAB=m +CONFIG_SERIAL_SUNHV=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set @@ -1116,11 +1123,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_INFINIBAND is not set # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index 83d67eb..6f68164 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -11,10 +11,12 @@ obj-y := process.o setup.o cpu.o idprom.o \ traps.o devices.o auxio.o una_asm.o \ irq.o ptrace.o time.o sys_sparc.o signal.o \ unaligned.o central.o pci.o starfire.o semaphore.o \ - power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o + power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \ + visemul.o obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ - pci_psycho.o pci_sabre.o pci_schizo.o + pci_psycho.o pci_sabre.o pci_schizo.o \ + pci_sun4v.o pci_sun4v_asm.o obj-$(CONFIG_SMP) += smp.o trampoline.o obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o @@ -38,5 +40,5 @@ else CMODEL_CFLAG := -m64 -mcmodel=medlow endif -head.o: head.S ttable.S itlb_base.S dtlb_base.S dtlb_backend.S dtlb_prot.S \ +head.o: head.S ttable.S itlb_miss.S dtlb_miss.S ktlb.S tsb.S \ etrap.S rtrap.S winfixup.S entry.S diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index 202a80c..d7caa60 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -31,6 +31,7 @@ #include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> +#include <asm/mmu_context.h> static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout32_library(struct file*); @@ -238,6 +239,8 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs) (current->mm->start_data = N_DATADDR(ex)); current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); + current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; current->mm->mmap = NULL; compute_creds(bprm); @@ -329,15 +332,8 @@ beyond_if: current->mm->start_stack = (unsigned long) create_aout32_tables((char __user *)bprm->p, bprm); - if (!(orig_thr_flags & _TIF_32BIT)) { - unsigned long pgd_cache = get_pgd_cache(current->mm->pgd); - - __asm__ __volatile__("stxa\t%0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (pgd_cache), - "r" (TSB_REG), "i" (ASI_DMMU)); - } + tsb_context_switch(current->mm); + start_thread32(regs, ex.a_entry, current->mm->start_stack); if (current->ptrace & PT_PTRACED) send_sig(SIGTRAP, current, 0); diff --git a/arch/sparc64/kernel/binfmt_elf32.c b/arch/sparc64/kernel/binfmt_elf32.c index a1a12d2..8a2abcc 100644 --- a/arch/sparc64/kernel/binfmt_elf32.c +++ b/arch/sparc64/kernel/binfmt_elf32.c @@ -153,7 +153,9 @@ MODULE_AUTHOR("Eric Youngdale, David S. Miller, Jakub Jelinek"); #undef MODULE_DESCRIPTION #undef MODULE_AUTHOR +#include <asm/a.out.h> + #undef TASK_SIZE -#define TASK_SIZE 0xf0000000 +#define TASK_SIZE STACK_TOP32 #include "../../../fs/binfmt_elf.c" diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c index 00eed88..11cc0ca 100644 --- a/arch/sparc64/kernel/cpu.c +++ b/arch/sparc64/kernel/cpu.c @@ -13,6 +13,7 @@ #include <asm/system.h> #include <asm/fpumacro.h> #include <asm/cpudata.h> +#include <asm/spitfire.h> DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; @@ -71,6 +72,12 @@ void __init cpu_probe(void) unsigned long ver, fpu_vers, manuf, impl, fprs; int i; + if (tlb_type == hypervisor) { + sparc_cpu_type = "UltraSparc T1 (Niagara)"; + sparc_fpu_type = "UltraSparc T1 integrated FPU"; + return; + } + fprs = fprs_read(); fprs_write(FPRS_FEF); __asm__ __volatile__ ("rdpr %%ver, %0; stx %%fsr, [%1]" diff --git a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c index df9a1ca..007e892 100644 --- a/arch/sparc64/kernel/devices.c +++ b/arch/sparc64/kernel/devices.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/spinlock.h> #include <linux/errno.h> +#include <linux/bootmem.h> #include <asm/page.h> #include <asm/oplib.h> @@ -20,6 +21,8 @@ #include <asm/spitfire.h> #include <asm/timer.h> #include <asm/cpudata.h> +#include <asm/vdev.h> +#include <asm/irq.h> /* Used to synchronize acceses to NatSemi SUPER I/O chip configure * operations in asm/ns87303.h @@ -29,13 +32,158 @@ DEFINE_SPINLOCK(ns87303_lock); extern void cpu_probe(void); extern void central_probe(void); -static char *cpu_mid_prop(void) +u32 sun4v_vdev_devhandle; +int sun4v_vdev_root; + +struct vdev_intmap { + unsigned int phys; + unsigned int irq; + unsigned int cnode; + unsigned int cinterrupt; +}; + +struct vdev_intmask { + unsigned int phys; + unsigned int interrupt; + unsigned int __unused; +}; + +static struct vdev_intmap *vdev_intmap; +static int vdev_num_intmap; +static struct vdev_intmask vdev_intmask; + +static void __init sun4v_virtual_device_probe(void) +{ + struct linux_prom64_registers regs; + struct vdev_intmap *ip; + int node, sz, err; + + if (tlb_type != hypervisor) + return; + + node = prom_getchild(prom_root_node); + node = prom_searchsiblings(node, "virtual-devices"); + if (!node) { + prom_printf("SUN4V: Fatal error, no virtual-devices node.\n"); + prom_halt(); + } + + sun4v_vdev_root = node; + + prom_getproperty(node, "reg", (char *)®s, sizeof(regs)); + sun4v_vdev_devhandle = (regs.phys_addr >> 32UL) & 0x0fffffff; + + sz = prom_getproplen(node, "interrupt-map"); + if (sz <= 0) { + prom_printf("SUN4V: Error, no vdev interrupt-map.\n"); + prom_halt(); + } + + if ((sz % sizeof(*ip)) != 0) { + prom_printf("SUN4V: Bogus interrupt-map property size %d\n", + sz); + prom_halt(); + } + + vdev_intmap = ip = alloc_bootmem_low_pages(sz); + if (!vdev_intmap) { + prom_printf("SUN4V: Error, cannot allocate vdev_intmap.\n"); + prom_halt(); + } + + err = prom_getproperty(node, "interrupt-map", (char *) ip, sz); + if (err == -1) { + prom_printf("SUN4V: Fatal error, no vdev interrupt-map.\n"); + prom_halt(); + } + if (err != sz) { + prom_printf("SUN4V: Inconsistent interrupt-map size, " + "proplen(%d) vs getprop(%d).\n", sz,err); + prom_halt(); + } + + vdev_num_intmap = err / sizeof(*ip); + + err = prom_getproperty(node, "interrupt-map-mask", + (char *) &vdev_intmask, + sizeof(vdev_intmask)); + if (err <= 0) { + prom_printf("SUN4V: Fatal error, no vdev " + "interrupt-map-mask.\n"); + prom_halt(); + } + if (err % sizeof(vdev_intmask)) { + prom_printf("SUN4V: Bogus interrupt-map-mask " + "property size %d\n", err); + prom_halt(); + } + + printk("SUN4V: virtual-devices devhandle[%x]\n", + sun4v_vdev_devhandle); +} + +unsigned int sun4v_vdev_device_interrupt(unsigned int dev_node) +{ + unsigned int irq, reg; + int err, i; + + err = prom_getproperty(dev_node, "interrupts", + (char *) &irq, sizeof(irq)); + if (err <= 0) { + printk("VDEV: Cannot get \"interrupts\" " + "property for OBP node %x\n", dev_node); + return 0; + } + + err = prom_getproperty(dev_node, "reg", + (char *) ®, sizeof(reg)); + if (err <= 0) { + printk("VDEV: Cannot get \"reg\" " + "property for OBP node %x\n", dev_node); + return 0; + } + + for (i = 0; i < vdev_num_intmap; i++) { + if (vdev_intmap[i].phys == (reg & vdev_intmask.phys) && + vdev_intmap[i].irq == (irq & vdev_intmask.interrupt)) { + irq = vdev_intmap[i].cinterrupt; + break; + } + } + + if (i == vdev_num_intmap) { + printk("VDEV: No matching interrupt map entry " + "for OBP node %x\n", dev_node); + return 0; + } + + return sun4v_build_irq(sun4v_vdev_devhandle, irq, 5, 0); +} + +static const char *cpu_mid_prop(void) { if (tlb_type == spitfire) return "upa-portid"; return "portid"; } +static int get_cpu_mid(int prom_node) +{ + if (tlb_type == hypervisor) { + struct linux_prom64_registers reg; + + if (prom_getproplen(prom_node, "cpuid") == 4) + return prom_getintdefault(prom_node, "cpuid", 0); + + prom_getproperty(prom_node, "reg", (char *) ®, sizeof(reg)); + return (reg.phys_addr >> 32) & 0x0fffffffUL; + } else { + const char *prop_name = cpu_mid_prop(); + + return prom_getintdefault(prom_node, prop_name, 0); + } +} + static int check_cpu_node(int nd, int *cur_inst, int (*compare)(int, int, void *), void *compare_arg, int *prom_node, int *mid) @@ -50,7 +198,7 @@ static int check_cpu_node(int nd, int *cur_inst, if (prom_node) *prom_node = nd; if (mid) - *mid = prom_getintdefault(nd, cpu_mid_prop(), 0); + *mid = get_cpu_mid(nd); return 0; } @@ -105,7 +253,7 @@ static int cpu_mid_compare(int nd, int instance, void *_arg) int desired_mid = (int) (long) _arg; int this_mid; - this_mid = prom_getintdefault(nd, cpu_mid_prop(), 0); + this_mid = get_cpu_mid(nd); if (this_mid == desired_mid) return 0; return -ENODEV; @@ -126,7 +274,8 @@ void __init device_scan(void) #ifndef CONFIG_SMP { - int err, cpu_node; + int err, cpu_node, def; + err = cpu_find_by_instance(0, &cpu_node, NULL); if (err) { prom_printf("No cpu nodes, cannot continue\n"); @@ -135,21 +284,40 @@ void __init device_scan(void) cpu_data(0).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); + + def = ((tlb_type == hypervisor) ? + (8 * 1024) : + (16 * 1024)); cpu_data(0).dcache_size = prom_getintdefault(cpu_node, "dcache-size", - 16 * 1024); + def); + + def = 32; cpu_data(0).dcache_line_size = - prom_getintdefault(cpu_node, "dcache-line-size", 32); + prom_getintdefault(cpu_node, "dcache-line-size", + def); + + def = 16 * 1024; cpu_data(0).icache_size = prom_getintdefault(cpu_node, "icache-size", - 16 * 1024); + def); + + def = 32; cpu_data(0).icache_line_size = - prom_getintdefault(cpu_node, "icache-line-size", 32); + prom_getintdefault(cpu_node, "icache-line-size", + def); + + def = ((tlb_type == hypervisor) ? + (3 * 1024 * 1024) : + (4 * 1024 * 1024)); cpu_data(0).ecache_size = prom_getintdefault(cpu_node, "ecache-size", - 4 * 1024 * 1024); + def); + + def = 64; cpu_data(0).ecache_line_size = - prom_getintdefault(cpu_node, "ecache-line-size", 64); + prom_getintdefault(cpu_node, "ecache-line-size", + def); printk("CPU[0]: Caches " "D[sz(%d):line_sz(%d)] " "I[sz(%d):line_sz(%d)] " @@ -160,6 +328,7 @@ void __init device_scan(void) } #endif + sun4v_virtual_device_probe(); central_probe(); cpu_probe(); diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S deleted file mode 100644 index acc889a..0000000 --- a/arch/sparc64/kernel/dtlb_backend.S +++ /dev/null @@ -1,170 +0,0 @@ -/* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $ - * dtlb_backend.S: Back end to DTLB miss replacement strategy. - * This is included directly into the trap table. - * - * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include <asm/pgtable.h> -#include <asm/mmu.h> - -#define VALID_SZ_BITS (_PAGE_VALID | _PAGE_SZBITS) - -#define VPTE_BITS (_PAGE_CP | _PAGE_CV | _PAGE_P ) -#define VPTE_SHIFT (PAGE_SHIFT - 3) - -/* Ways we can get here: - * - * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1. - * 2) Nucleus loads and stores to/from user/kernel window save areas. - * 3) VPTE misses from dtlb_base and itlb_base. - * - * We need to extract out the PMD and PGDIR indexes from the - * linear virtual page table access address. The PTE index - * is at the bottom, but we are not concerned with it. Bits - * 0 to 2 are clear since each PTE is 8 bytes in size. Each - * PMD and PGDIR entry are 4 bytes in size. Thus, this - * address looks something like: - * - * |---------------------------------------------------------------| - * | ... | PGDIR index | PMD index | PTE index | | - * |---------------------------------------------------------------| - * 63 F E D C B A 3 2 0 <- bit nr - * - * The variable bits above are defined as: - * A --> 3 + (PAGE_SHIFT - log2(8)) - * --> 3 + (PAGE_SHIFT - 3) - 1 - * (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1) - * B --> A + 1 - * C --> B + (PAGE_SHIFT - log2(4)) - * --> B + (PAGE_SHIFT - 2) - 1 - * (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1) - * D --> C + 1 - * E --> D + (PAGE_SHIFT - log2(4)) - * --> D + (PAGE_SHIFT - 2) - 1 - * (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1) - * F --> E + 1 - * - * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants - * cancel out.) - * - * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are: - * A --> 12 - * B --> 13 - * C --> 23 - * D --> 24 - * E --> 34 - * F --> 35 - * - * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are: - * A --> 15 - * B --> 16 - * C --> 29 - * D --> 30 - * E --> 43 - * F --> 44 - * - * Because bits both above and below each PGDIR and PMD index need to - * be masked out, and the index can be as long as 14 bits (when using a - * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions - * to extract each index out. - * - * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so - * we try to avoid using them for the entire operation. We could setup - * a mask anywhere from bit 31 down to bit 10 using the sethi instruction. - * - * We need a mask covering bits B --> C and one covering D --> E. - * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000. - * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000. - * The second in each set cannot be loaded with a single sethi - * instruction, because the upper bits are past bit 32. We would - * need to use a sethi + a shift. - * - * For the time being, we use 2 shifts and a simple "and" mask. - * We shift left to clear the bits above the index, we shift down - * to clear the bits below the index (sans the log2(4 or 8) bits) - * and a mask to clear the log2(4 or 8) bits. We need therefore - * define 4 shift counts, all of which are relative to PAGE_SHIFT. - * - * Although unsupportable for other reasons, this does mean that - * 512K and 4MB page sizes would be generaally supported by the - * kernel. (ELF binaries would break with > 64K PAGE_SIZE since - * the sections are only aligned that strongly). - * - * The operations performed for extraction are thus: - * - * ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3 - * - */ - -#define A (3 + (PAGE_SHIFT - 3) - 1) -#define B (A + 1) -#define C (B + (PAGE_SHIFT - 2) - 1) -#define D (C + 1) -#define E (D + (PAGE_SHIFT - 2) - 1) -#define F (E + 1) - -#define PMD_SHIFT_LEFT (64 - D) -#define PMD_SHIFT_RIGHT (64 - (D - B) - 2) -#define PGDIR_SHIFT_LEFT (64 - F) -#define PGDIR_SHIFT_RIGHT (64 - (F - D) - 2) -#define LOW_MASK_BITS 0x3 - -/* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss */ - ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS - add %g3, %g3, %g5 ! Compute VPTE base - cmp %g4, %g5 ! VPTE miss? - bgeu,pt %xcc, 1f ! Continue here - andcc %g4, TAG_CONTEXT_BITS, %g5 ! tl0 miss Nucleus test - ba,a,pt %xcc, from_tl1_trap ! Fall to tl0 miss -1: sllx %g6, VPTE_SHIFT, %g4 ! Position TAG_ACCESS - or %g4, %g5, %g4 ! Prepare TAG_ACCESS - -/* TLB1 ** ICACHE line 2: Quick VPTE miss */ - mov TSB_REG, %g1 ! Grab TSB reg - ldxa [%g1] ASI_DMMU, %g5 ! Doing PGD caching? - sllx %g6, PMD_SHIFT_LEFT, %g1 ! Position PMD offset - be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus? - srlx %g1, PMD_SHIFT_RIGHT, %g1 ! Mask PMD offset bits - brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke - andn %g1, LOW_MASK_BITS, %g1 ! Final PMD mask - sllx %g6, PGDIR_SHIFT_LEFT, %g5 ! Position PGD offset - -/* TLB1 ** ICACHE line 3: Quick VPTE miss */ - srlx %g5, PGDIR_SHIFT_RIGHT, %g5 ! Mask PGD offset bits - andn %g5, LOW_MASK_BITS, %g5 ! Final PGD mask - lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD - brz,pn %g5, vpte_noent ! Valid? -sparc64_kpte_continue: - sllx %g5, 11, %g5 ! Shift into place -sparc64_vpte_continue: - lduwa [%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD - sllx %g5, 11, %g5 ! Shift into place - brz,pn %g5, vpte_noent ! Valid? - -/* TLB1 ** ICACHE line 4: Quick VPTE miss */ - mov (VALID_SZ_BITS >> 61), %g1 ! upper vpte into %g1 - sllx %g1, 61, %g1 ! finish calc - or %g5, VPTE_BITS, %g5 ! Prepare VPTE data - or %g5, %g1, %g5 ! ... - mov TLB_SFSR, %g1 ! Restore %g1 value - stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB - stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS - retry ! Load PTE once again - -#undef VALID_SZ_BITS -#undef VPTE_SHIFT -#undef VPTE_BITS -#undef A -#undef B -#undef C -#undef D -#undef E -#undef F -#undef PMD_SHIFT_LEFT -#undef PMD_SHIFT_RIGHT -#undef PGDIR_SHIFT_LEFT -#undef PGDIR_SHIFT_RIGHT -#undef LOW_MASK_BITS - diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S deleted file mode 100644 index 6528786..0000000 --- a/arch/sparc64/kernel/dtlb_base.S +++ /dev/null @@ -1,109 +0,0 @@ -/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $ - * dtlb_base.S: Front end to DTLB miss replacement strategy. - * This is included directly into the trap table. - * - * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include <asm/pgtable.h> -#include <asm/mmu.h> - -/* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS) - * %g2 (KERN_HIGHBITS | KERN_LOWBITS) - * %g3 VPTE base (0xfffffffe00000000) Spitfire/Blackbird (44-bit VA space) - * (0xffe0000000000000) Cheetah (64-bit VA space) - * %g7 __pa(current->mm->pgd) - * - * The VPTE base value is completely magic, but note that - * few places in the kernel other than these TLB miss - * handlers know anything about the VPTE mechanism or - * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD). - * Consider the 44-bit VADDR Ultra-I/II case as an example: - * - * VA[0 : (1<<43)] produce VPTE index [%g3 : 0] - * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3] - * - * For Cheetah's 64-bit VADDR space this is: - * - * VA[0 : (1<<63)] produce VPTE index [%g3 : 0] - * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3] - * - * If you're paying attention you'll notice that this means half of - * the VPTE table is above %g3 and half is below, low VA addresses - * map progressively upwards from %g3, and high VA addresses map - * progressively upwards towards %g3. This trick was needed to make - * the same 8 instruction handler work both for Spitfire/Blackbird's - * peculiar VA space hole configuration and the full 64-bit VA space - * one of Cheetah at the same time. - */ - -/* Ways we can get here: - * - * 1) Nucleus loads and stores to/from PA-->VA direct mappings. - * 2) Nucleus loads and stores to/from vmalloc() areas. - * 3) User loads and stores. - * 4) User space accesses by nucleus at tl0 - */ - -#if PAGE_SHIFT == 13 -/* - * To compute vpte offset, we need to do ((addr >> 13) << 3), - * which can be optimized to (addr >> 10) if bits 10/11/12 can - * be guaranteed to be 0 ... mmu_context.h does guarantee this - * by only using 10 bits in the hwcontext value. - */ -#define CREATE_VPTE_OFFSET1(r1, r2) nop -#define CREATE_VPTE_OFFSET2(r1, r2) \ - srax r1, 10, r2 -#else -#define CREATE_VPTE_OFFSET1(r1, r2) \ - srax r1, PAGE_SHIFT, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) \ - sllx r2, 3, r2 -#endif - -/* DTLB ** ICACHE line 1: Quick user TLB misses */ - mov TLB_SFSR, %g1 - ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS - andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? -from_tl1_trap: - rdpr %tl, %g5 ! For TL==3 test - CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset - be,pn %xcc, kvmap ! Yep, special processing - CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset - cmp %g5, 4 ! Last trap level? - -/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */ - be,pn %xcc, longpath ! Yep, cannot risk VPTE miss - nop ! delay slot - ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE -1: brgez,pn %g5, longpath ! Invalid, branch out - nop ! Delay-slot -9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB - retry ! Trap return - nop - -/* DTLB ** ICACHE line 3: winfixups+real_faults */ -longpath: - rdpr %pstate, %g5 ! Move into alternate globals - wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tl, %g4 ! See where we came from. - cmp %g4, 1 ! Is etrap/rtrap window fault? - mov TLB_TAG_ACCESS, %g4 ! Prepare for fault processing - ldxa [%g4] ASI_DMMU, %g5 ! Load faulting VA page - be,pt %xcc, sparc64_realfault_common ! Jump to normal fault handling - mov FAULT_CODE_DTLB, %g4 ! It was read from DTLB - -/* DTLB ** ICACHE line 4: Unused... */ - ba,a,pt %xcc, winfix_trampoline ! Call window fixup code - nop - nop - nop - nop - nop - nop - nop - -#undef CREATE_VPTE_OFFSET1 -#undef CREATE_VPTE_OFFSET2 diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S new file mode 100644 index 0000000..09a6a15 --- /dev/null +++ b/arch/sparc64/kernel/dtlb_miss.S @@ -0,0 +1,39 @@ +/* DTLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context + brz,pn %g5, kvmap_dtlb ! Context 0 processing + srlx %g6, 22, %g6 ! Delay slot + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + +/* DTLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_dtlb ! Miss + mov FAULT_CODE_DTLB, %g3 + stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB + retry ! Trap done + nop + nop + nop + nop + +/* DTLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* DTLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index 7991e91..c69504a 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c @@ -277,10 +277,9 @@ static inline void *ebus_alloc(size_t size) { void *mem; - mem = kmalloc(size, GFP_ATOMIC); + mem = kzalloc(size, GFP_ATOMIC); if (!mem) panic("ebus_alloc: out of memory"); - memset((char *)mem, 0, size); return mem; } diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a73553a..6d0b3ed 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -50,7 +50,8 @@ do_fpdis: add %g0, %g0, %g0 ba,a,pt %xcc, rtrap_clr_l6 -1: ldub [%g6 + TI_FPSAVED], %g5 +1: TRAP_LOAD_THREAD_REG(%g6, %g1) + ldub [%g6 + TI_FPSAVED], %g5 wr %g0, FPRS_FEF, %fprs andcc %g5, FPRS_FEF, %g0 be,a,pt %icc, 1f @@ -96,10 +97,22 @@ do_fpdis: add %g6, TI_FPREGS + 0x80, %g1 faddd %f0, %f2, %f4 fmuld %f0, %f2, %f6 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync add %g6, TI_FPREGS + 0xc0, %g2 faddd %f0, %f2, %f8 @@ -125,11 +138,23 @@ do_fpdis: fzero %f32 mov SECONDARY_CONTEXT, %g3 fzero %f34 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + add %g6, TI_FPREGS, %g1 sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync add %g6, TI_FPREGS + 0x40, %g2 faddd %f32, %f34, %f36 @@ -154,10 +179,22 @@ do_fpdis: nop 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync mov 0x40, %g2 membar #Sync @@ -168,7 +205,13 @@ do_fpdis: ldda [%g1 + %g2] ASI_BLK_S, %f48 membar #Sync fpdis_exit: - stxa %g5, [%g3] ASI_DMMU + +661: stxa %g5, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g5, [%g3] ASI_MMU + .previous + membar #Sync fpdis_exit2: wr %g7, 0, %gsr @@ -189,6 +232,7 @@ fp_other_bounce: .globl do_fpother_check_fitos .align 32 do_fpother_check_fitos: + TRAP_LOAD_THREAD_REG(%g6, %g1) sethi %hi(fp_other_bounce - 4), %g7 or %g7, %lo(fp_other_bounce - 4), %g7 @@ -312,6 +356,7 @@ fitos_emul_fini: .globl do_fptrap .align 32 do_fptrap: + TRAP_LOAD_THREAD_REG(%g6, %g1) stx %fsr, [%g6 + TI_XFSR] do_fptrap_after_fsr: ldub [%g6 + TI_FPSAVED], %g3 @@ -321,10 +366,22 @@ do_fptrap_after_fsr: rd %gsr, %g3 stx %g3, [%g6 + TI_GSR] mov SECONDARY_CONTEXT, %g3 - ldxa [%g3] ASI_DMMU, %g5 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + sethi %hi(sparc64_kern_sec_context), %g2 ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 - stxa %g2, [%g3] ASI_DMMU + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + membar #Sync add %g6, TI_FPREGS, %g2 andcc %g1, FPRS_DL, %g0 @@ -339,7 +396,13 @@ do_fptrap_after_fsr: stda %f48, [%g2 + %g3] ASI_BLK_S 5: mov SECONDARY_CONTEXT, %g1 membar #Sync - stxa %g5, [%g1] ASI_DMMU + +661: stxa %g5, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g5, [%g1] ASI_MMU + .previous + membar #Sync ba,pt %xcc, etrap wr %g0, 0, %fprs @@ -353,8 +416,6 @@ do_fptrap_after_fsr: * * With this method we can do most of the cross-call tlb/cache * flushing very quickly. - * - * Current CPU's IRQ worklist table is locked into %g6, don't touch. */ .text .align 32 @@ -378,6 +439,8 @@ do_ivec: sllx %g2, %g4, %g2 sllx %g4, 2, %g4 + TRAP_LOAD_IRQ_WORK(%g6, %g1) + lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */ @@ -399,76 +462,6 @@ do_ivec_xcall: 1: jmpl %g3, %g0 nop - .globl save_alternate_globals -save_alternate_globals: /* %o0 = save_area */ - rdpr %pstate, %o5 - andn %o5, PSTATE_IE, %o1 - wrpr %o1, PSTATE_AG, %pstate - stx %g0, [%o0 + 0x00] - stx %g1, [%o0 + 0x08] - stx %g2, [%o0 + 0x10] - stx %g3, [%o0 + 0x18] - stx %g4, [%o0 + 0x20] - stx %g5, [%o0 + 0x28] - stx %g6, [%o0 + 0x30] - stx %g7, [%o0 + 0x38] - wrpr %o1, PSTATE_IG, %pstate - stx %g0, [%o0 + 0x40] - stx %g1, [%o0 + 0x48] - stx %g2, [%o0 + 0x50] - stx %g3, [%o0 + 0x58] - stx %g4, [%o0 + 0x60] - stx %g5, [%o0 + 0x68] - stx %g6, [%o0 + 0x70] - stx %g7, [%o0 + 0x78] - wrpr %o1, PSTATE_MG, %pstate - stx %g0, [%o0 + 0x80] - stx %g1, [%o0 + 0x88] - stx %g2, [%o0 + 0x90] - stx %g3, [%o0 + 0x98] - stx %g4, [%o0 + 0xa0] - stx %g5, [%o0 + 0xa8] - stx %g6, [%o0 + 0xb0] - stx %g7, [%o0 + 0xb8] - wrpr %o5, 0x0, %pstate - retl - nop - - .globl restore_alternate_globals -restore_alternate_globals: /* %o0 = save_area */ - rdpr %pstate, %o5 - andn %o5, PSTATE_IE, %o1 - wrpr %o1, PSTATE_AG, %pstate - ldx [%o0 + 0x00], %g0 - ldx [%o0 + 0x08], %g1 - ldx [%o0 + 0x10], %g2 - ldx [%o0 + 0x18], %g3 - ldx [%o0 + 0x20], %g4 - ldx [%o0 + 0x28], %g5 - ldx [%o0 + 0x30], %g6 - ldx [%o0 + 0x38], %g7 - wrpr %o1, PSTATE_IG, %pstate - ldx [%o0 + 0x40], %g0 - ldx [%o0 + 0x48], %g1 - ldx [%o0 + 0x50], %g2 - ldx [%o0 + 0x58], %g3 - ldx [%o0 + 0x60], %g4 - ldx [%o0 + 0x68], %g5 - ldx [%o0 + 0x70], %g6 - ldx [%o0 + 0x78], %g7 - wrpr %o1, PSTATE_MG, %pstate - ldx [%o0 + 0x80], %g0 - ldx [%o0 + 0x88], %g1 - ldx [%o0 + 0x90], %g2 - ldx [%o0 + 0x98], %g3 - ldx [%o0 + 0xa0], %g4 - ldx [%o0 + 0xa8], %g5 - ldx [%o0 + 0xb0], %g6 - ldx [%o0 + 0xb8], %g7 - wrpr %o5, 0x0, %pstate - retl - nop - .globl getcc, setcc getcc: ldx [%o0 + PT_V9_TSTATE], %o1 @@ -488,9 +481,24 @@ setcc: retl stx %o1, [%o0 + PT_V9_TSTATE] - .globl utrap, utrap_ill -utrap: brz,pn %g1, etrap + .globl utrap_trap +utrap_trap: /* %g3=handler,%g4=level */ + TRAP_LOAD_THREAD_REG(%g6, %g1) + ldx [%g6 + TI_UTRAPS], %g1 + brnz,pt %g1, invoke_utrap nop + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + call bad_trap + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 + +invoke_utrap: + sllx %g3, 3, %g3 + ldx [%g1 + %g3], %g1 save %sp, -128, %sp rdpr %tstate, %l6 rdpr %cwp, %l7 @@ -500,17 +508,6 @@ utrap: brz,pn %g1, etrap rdpr %tnpc, %l7 wrpr %g1, 0, %tnpc done -utrap_ill: - call bad_trap - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 - - /* XXX Here is stuff we still need to write... -DaveM XXX */ - .globl netbsd_syscall -netbsd_syscall: - retl - nop /* We need to carefully read the error status, ACK * the errors, prevent recursive traps, and pass the @@ -1001,7 +998,7 @@ dcpe_icpe_tl1_common: * %g3: scratch * %g4: AFSR * %g5: AFAR - * %g6: current thread ptr + * %g6: unused, will have current thread ptr after etrap * %g7: scratch */ __cheetah_log_error: @@ -1539,13 +1536,14 @@ ret_from_syscall: 1: b,pt %xcc, ret_sys_call ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 -sparc_exit: wrpr %g0, (PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV), %pstate +sparc_exit: rdpr %pstate, %g2 + wrpr %g2, PSTATE_IE, %pstate rdpr %otherwin, %g1 rdpr %cansave, %g3 add %g3, %g1, %g3 wrpr %g3, 0x0, %cansave wrpr %g0, 0x0, %otherwin - wrpr %g0, (PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE), %pstate + wrpr %g2, 0x0, %pstate ba,pt %xcc, sys_exit stb %g0, [%g6 + TI_WSAVED] @@ -1690,3 +1688,138 @@ __flushw_user: restore %g0, %g0, %g0 2: retl nop + +#ifdef CONFIG_SMP + .globl hard_smp_processor_id +hard_smp_processor_id: +#endif + .globl real_hard_smp_processor_id +real_hard_smp_processor_id: + __GET_CPUID(%o0) + retl + nop + + /* %o0: devhandle + * %o1: devino + * + * returns %o0: sysino + */ + .globl sun4v_devino_to_sysino +sun4v_devino_to_sysino: + mov HV_FAST_INTR_DEVINO2SYSINO, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * + * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + */ + .globl sun4v_intr_getenabled +sun4v_intr_getenabled: + mov HV_FAST_INTR_GETENABLED, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + */ + .globl sun4v_intr_setenabled +sun4v_intr_setenabled: + mov HV_FAST_INTR_SETENABLED, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: sysino + * + * returns %o0: intr_state (HV_INTR_STATE_*) + */ + .globl sun4v_intr_getstate +sun4v_intr_getstate: + mov HV_FAST_INTR_GETSTATE, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * %o1: intr_state (HV_INTR_STATE_*) + */ + .globl sun4v_intr_setstate +sun4v_intr_setstate: + mov HV_FAST_INTR_SETSTATE, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: sysino + * + * returns %o0: cpuid + */ + .globl sun4v_intr_gettarget +sun4v_intr_gettarget: + mov HV_FAST_INTR_GETTARGET, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: sysino + * %o1: cpuid + */ + .globl sun4v_intr_settarget +sun4v_intr_settarget: + mov HV_FAST_INTR_SETTARGET, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: type + * %o1: queue paddr + * %o2: num queue entries + * + * returns %o0: status + */ + .globl sun4v_cpu_qconf +sun4v_cpu_qconf: + mov HV_FAST_CPU_QCONF, %o5 + ta HV_FAST_TRAP + retl + nop + + /* returns %o0: status + */ + .globl sun4v_cpu_yield +sun4v_cpu_yield: + mov HV_FAST_CPU_YIELD, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: num cpus in cpu list + * %o1: cpu list paddr + * %o2: mondo block paddr + * + * returns %o0: status + */ + .globl sun4v_cpu_mondo_send +sun4v_cpu_mondo_send: + mov HV_FAST_CPU_MONDO_SEND, %o5 + ta HV_FAST_TRAP + retl + nop + + /* %o0: CPU ID + * + * returns %o0: -status if status non-zero, else + * %o0: cpu state as HV_CPU_STATE_* + */ + .globl sun4v_cpu_state +sun4v_cpu_state: + mov HV_FAST_CPU_STATE, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 1f + sub %g0, %o0, %o0 + mov %o1, %o0 +1: retl + nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 0d8eba2..1493838 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -31,6 +31,7 @@ .globl etrap, etrap_irq, etraptl1 etrap: rdpr %pil, %g2 etrap_irq: + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -54,7 +55,31 @@ etrap_irq: rd %y, %g3 stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y] - save %g2, -STACK_BIAS, %sp ! Ordering here is critical + + rdpr %cansave, %g1 + brnz,pt %g1, etrap_save + nop + + rdpr %cwp, %g1 + add %g1, 2, %g1 + wrpr %g1, %cwp + be,pt %xcc, etrap_user_spill + mov ASI_AIUP, %g3 + + rdpr %otherwin, %g3 + brz %g3, etrap_kernel_spill + mov ASI_AIUS, %g3 + +etrap_user_spill: + + wr %g3, 0x0, %asi + ldx [%g6 + TI_FLAGS], %g3 + and %g3, _TIF_32BIT, %g3 + brnz,pt %g3, etrap_user_spill_32bit + nop + ba,a,pt %xcc, etrap_user_spill_64bit + +etrap_save: save %g2, -STACK_BIAS, %sp mov %g6, %l6 bne,pn %xcc, 3f @@ -70,42 +95,56 @@ etrap_irq: wrpr %g2, 0, %wstate sethi %hi(sparc64_kern_pri_context), %g2 ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 - stxa %g3, [%l4] ASI_DMMU - flush %l6 - wr %g0, ASI_AIUS, %asi -2: wrpr %g0, 0x0, %tl - mov %g4, %l4 + +661: stxa %g3, [%l4] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g3, [%l4] ASI_MMU + .previous + + sethi %hi(KERNBASE), %l4 + flush %l4 + mov ASI_AIUS, %l7 +2: mov %g4, %l4 mov %g5, %l5 + add %g7, 4, %l2 + + /* Go to trap time globals so we can save them. */ +661: wrpr %g0, ETRAP_PSTATE1, %pstate + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous - mov %g7, %l2 - wrpr %g0, ETRAP_PSTATE1, %pstate stx %g1, [%sp + PTREGS_OFF + PT_V9_G1] stx %g2, [%sp + PTREGS_OFF + PT_V9_G2] + sllx %l7, 24, %l7 stx %g3, [%sp + PTREGS_OFF + PT_V9_G3] + rdpr %cwp, %l0 stx %g4, [%sp + PTREGS_OFF + PT_V9_G4] stx %g5, [%sp + PTREGS_OFF + PT_V9_G5] stx %g6, [%sp + PTREGS_OFF + PT_V9_G6] - stx %g7, [%sp + PTREGS_OFF + PT_V9_G7] + or %l7, %l0, %l7 + sethi %hi(TSTATE_RMO | TSTATE_PEF), %l0 + or %l7, %l0, %l7 + wrpr %l2, %tnpc + wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate stx %i0, [%sp + PTREGS_OFF + PT_V9_I0] stx %i1, [%sp + PTREGS_OFF + PT_V9_I1] stx %i2, [%sp + PTREGS_OFF + PT_V9_I2] stx %i3, [%sp + PTREGS_OFF + PT_V9_I3] stx %i4, [%sp + PTREGS_OFF + PT_V9_I4] stx %i5, [%sp + PTREGS_OFF + PT_V9_I5] - stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] - stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] - wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 -#ifdef CONFIG_SMP - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif - jmpl %l2 + 0x4, %g0 - ldx [%g6 + TI_TASK], %g4 + stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) + ldx [%g6 + TI_TASK], %g4 + done -3: ldub [%l6 + TI_FPDEPTH], %l5 +3: mov ASI_P, %l7 + ldub [%l6 + TI_FPDEPTH], %l5 add %l6, TI_FPSAVED + 1, %l4 srl %l5, 1, %l3 add %l5, 2, %l5 @@ -125,6 +164,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. * 0x58 TL4's TT * 0x60 TL */ + TRAP_LOAD_THREAD_REG(%g6, %g1) sub %sp, ((4 * 8) * 4) + 8, %g2 rdpr %tl, %g1 @@ -148,6 +188,11 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. rdpr %tt, %g3 stx %g3, [%g2 + STACK_BIAS + 0x38] + sethi %hi(is_sun4v), %g3 + lduw [%g3 + %lo(is_sun4v)], %g3 + brnz,pn %g3, finish_tl1_capture + nop + wrpr %g0, 3, %tl rdpr %tstate, %g3 stx %g3, [%g2 + STACK_BIAS + 0x40] @@ -168,91 +213,20 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. rdpr %tt, %g3 stx %g3, [%g2 + STACK_BIAS + 0x78] - wrpr %g1, %tl stx %g1, [%g2 + STACK_BIAS + 0x80] +finish_tl1_capture: + wrpr %g0, 1, %tl +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(1) + .previous + rdpr %tstate, %g1 sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2 ba,pt %xcc, 1b andcc %g1, TSTATE_PRIV, %g0 - .align 64 - .globl scetrap -scetrap: rdpr %pil, %g2 - rdpr %tstate, %g1 - sllx %g2, 20, %g3 - andcc %g1, TSTATE_PRIV, %g0 - or %g1, %g3, %g1 - bne,pn %xcc, 1f - sub %sp, (STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS), %g2 - wrpr %g0, 7, %cleanwin - - sllx %g1, 51, %g3 - sethi %hi(TASK_REGOFF), %g2 - or %g2, %lo(TASK_REGOFF), %g2 - brlz,pn %g3, 1f - add %g6, %g2, %g2 - wr %g0, 0, %fprs -1: rdpr %tpc, %g3 - stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE] - - rdpr %tnpc, %g1 - stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC] - stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] - save %g2, -STACK_BIAS, %sp ! Ordering here is critical - mov %g6, %l6 - bne,pn %xcc, 2f - mov ASI_P, %l7 - rdpr %canrestore, %g3 - - rdpr %wstate, %g2 - wrpr %g0, 0, %canrestore - sll %g2, 3, %g2 - mov PRIMARY_CONTEXT, %l4 - wrpr %g3, 0, %otherwin - wrpr %g2, 0, %wstate - sethi %hi(sparc64_kern_pri_context), %g2 - ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 - stxa %g3, [%l4] ASI_DMMU - flush %l6 - - mov ASI_AIUS, %l7 -2: mov %g4, %l4 - mov %g5, %l5 - add %g7, 0x4, %l2 - wrpr %g0, ETRAP_PSTATE1, %pstate - stx %g1, [%sp + PTREGS_OFF + PT_V9_G1] - stx %g2, [%sp + PTREGS_OFF + PT_V9_G2] - sllx %l7, 24, %l7 - - stx %g3, [%sp + PTREGS_OFF + PT_V9_G3] - rdpr %cwp, %l0 - stx %g4, [%sp + PTREGS_OFF + PT_V9_G4] - stx %g5, [%sp + PTREGS_OFF + PT_V9_G5] - stx %g6, [%sp + PTREGS_OFF + PT_V9_G6] - stx %g7, [%sp + PTREGS_OFF + PT_V9_G7] - or %l7, %l0, %l7 - sethi %hi(TSTATE_RMO | TSTATE_PEF), %l0 - - or %l7, %l0, %l7 - wrpr %l2, %tnpc - wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate - stx %i0, [%sp + PTREGS_OFF + PT_V9_I0] - stx %i1, [%sp + PTREGS_OFF + PT_V9_I1] - stx %i2, [%sp + PTREGS_OFF + PT_V9_I2] - stx %i3, [%sp + PTREGS_OFF + PT_V9_I3] - stx %i4, [%sp + PTREGS_OFF + PT_V9_I4] - - stx %i5, [%sp + PTREGS_OFF + PT_V9_I5] - stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] - mov %l6, %g6 - stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] -#ifdef CONFIG_SMP - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif - ldx [%g6 + TI_TASK], %g4 - done - #undef TASK_REGOFF #undef ETRAP_PSTATE1 diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index b49dcd4..3eadac5 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -26,6 +26,7 @@ #include <asm/head.h> #include <asm/ttable.h> #include <asm/mmu.h> +#include <asm/cpudata.h> /* This section from from _start to sparc64_boot_end should fit into * 0x0000000000404000 to 0x0000000000408000. @@ -94,12 +95,17 @@ sparc64_boot: wrpr %g1, 0x0, %pstate ba,a,pt %xcc, 1f - .globl prom_finddev_name, prom_chosen_path - .globl prom_getprop_name, prom_mmu_name - .globl prom_callmethod_name, prom_translate_name + .globl prom_finddev_name, prom_chosen_path, prom_root_node + .globl prom_getprop_name, prom_mmu_name, prom_peer_name + .globl prom_callmethod_name, prom_translate_name, prom_root_compatible .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache .globl prom_boot_mapped_pc, prom_boot_mapping_mode .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low + .globl is_sun4v +prom_peer_name: + .asciz "peer" +prom_compatible_name: + .asciz "compatible" prom_finddev_name: .asciz "finddevice" prom_chosen_path: @@ -116,7 +122,13 @@ prom_map_name: .asciz "map" prom_unmap_name: .asciz "unmap" +prom_sun4v_name: + .asciz "sun4v" .align 4 +prom_root_compatible: + .skip 64 +prom_root_node: + .word 0 prom_mmu_ihandle_cache: .word 0 prom_boot_mapped_pc: @@ -128,8 +140,54 @@ prom_boot_mapping_phys_high: .xword 0 prom_boot_mapping_phys_low: .xword 0 +is_sun4v: + .word 0 1: rd %pc, %l0 + + mov (1b - prom_peer_name), %l1 + sub %l0, %l1, %l1 + mov 0, %l2 + + /* prom_root_node = prom_peer(0) */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "peer" + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, 0 + stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x20], %l4 ! prom root node + mov (1b - prom_root_node), %l1 + sub %l0, %l1, %l1 + stw %l4, [%l1] + + mov (1b - prom_getprop_name), %l1 + mov (1b - prom_compatible_name), %l2 + mov (1b - prom_root_compatible), %l5 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l5, %l5 + + /* prom_getproperty(prom_root_node, "compatible", + * &prom_root_compatible, 64) + */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, prom_root_node + stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible" + stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_root_compatible + mov 64, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size + stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + mov (1b - prom_finddev_name), %l1 mov (1b - prom_chosen_path), %l2 mov (1b - prom_boot_mapped_pc), %l3 @@ -238,6 +296,27 @@ prom_boot_mapping_phys_low: add %sp, (192 + 128), %sp sparc64_boot_after_remap: + sethi %hi(prom_root_compatible), %g1 + or %g1, %lo(prom_root_compatible), %g1 + sethi %hi(prom_sun4v_name), %g7 + or %g7, %lo(prom_sun4v_name), %g7 + mov 5, %g3 +1: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 + bne,pn %icc, 2f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 1b + add %g1, 1, %g1 + + sethi %hi(is_sun4v), %g1 + or %g1, %lo(is_sun4v), %g1 + mov 1, %g7 + stw %g7, [%g1] + +2: + BRANCH_IF_SUN4V(g1, jump_to_sun4u_init) BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) ba,pt %xcc, spitfire_boot @@ -301,20 +380,58 @@ jump_to_sun4u_init: nop sun4u_init: + BRANCH_IF_SUN4V(g1, sun4v_init) + /* Set ctx 0 */ - mov PRIMARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU - membar #Sync + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_DMMU + membar #Sync - mov SECONDARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_DMMU membar #Sync - BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup) + ba,pt %xcc, sun4u_continue + nop + +sun4v_init: + /* Set ctx 0 */ + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + ba,pt %xcc, niagara_tlb_fixup + nop + +sun4u_continue: + BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup) ba,pt %xcc, spitfire_tlb_fixup nop +niagara_tlb_fixup: + mov 3, %g2 /* Set TLB type to hypervisor. */ + sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] + + /* Patch copy/clear ops. */ + call niagara_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara_patch_pageops + nop + + /* Patch TLB/cache ops. */ + call hypervisor_patch_cachetlbops + nop + + ba,pt %xcc, tlb_fixup_done + nop + cheetah_tlb_fixup: mov 2, %g2 /* Set TLB type to cheetah+. */ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f) @@ -411,85 +528,55 @@ setup_trap_table: wrpr %g0, 15, %pil /* Make the firmware call to jump over to the Linux trap table. */ - call prom_set_trap_table - sethi %hi(sparc64_ttable_tl0), %o0 + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f + nop - /* Start using proper page size encodings in ctx register. */ - sethi %hi(sparc64_kern_pri_context), %g3 - ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 - mov PRIMARY_CONTEXT, %g1 - stxa %g2, [%g1] ASI_DMMU - membar #Sync + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD - /* The Linux trap handlers expect various trap global registers - * to be setup with some fixed values. So here we set these - * up very carefully. These globals are: - * - * Alternate Globals (PSTATE_AG): - * - * %g6 --> current_thread_info() - * - * MMU Globals (PSTATE_MG): - * - * %g1 --> TLB_SFSR - * %g2 --> ((_PAGE_VALID | _PAGE_SZ4MB | - * _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - * ^ 0xfffff80000000000) - * (this %g2 value is used for computing the PAGE_OFFSET kernel - * TLB entries quickly, the virtual address of the fault XOR'd - * with this %g2 value is the PTE to load into the TLB) - * %g3 --> VPTE_BASE_CHEETAH or VPTE_BASE_SPITFIRE + /* Compute physical address: * - * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()): - * - * %g6 --> __irq_work[smp_processor_id()] + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate - mov %o2, %g6 - -#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000) -#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - wrpr %o1, PSTATE_MG, %pstate - mov TSB_REG, %g1 - stxa %g0, [%g1] ASI_DMMU - membar #Sync - stxa %g0, [%g1] ASI_IMMU - membar #Sync - mov TLB_SFSR, %g1 - sethi %uhi(KERN_HIGHBITS), %g2 - or %g2, %ulo(KERN_HIGHBITS), %g2 - sllx %g2, 32, %g2 - or %g2, KERN_LOWBITS, %g2 - - BRANCH_IF_ANY_CHEETAH(g3,g7,8f) - ba,pt %xcc, 9f + call prom_set_trap_table_sun4v + sethi %hi(sparc64_ttable_tl0), %o0 + + ba,pt %xcc, 2f nop -8: - sethi %uhi(VPTE_BASE_CHEETAH), %g3 - or %g3, %ulo(VPTE_BASE_CHEETAH), %g3 - ba,pt %xcc, 2f - sllx %g3, 32, %g3 +1: call prom_set_trap_table + sethi %hi(sparc64_ttable_tl0), %o0 -9: - sethi %uhi(VPTE_BASE_SPITFIRE), %g3 - or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3 - sllx %g3, 32, %g3 + /* Start using proper page size encodings in ctx register. */ +2: sethi %hi(sparc64_kern_pri_context), %g3 + ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 -2: - clr %g7 -#undef KERN_HIGHBITS -#undef KERN_LOWBITS + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + membar #Sync /* Kill PROM timer */ sethi %hi(0x80000000), %o2 sllx %o2, 32, %o2 wr %o2, 0, %tick_cmpr - BRANCH_IF_ANY_CHEETAH(o2,o3,1f) + BRANCH_IF_SUN4V(o2, 1f) + BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) ba,pt %xcc, 2f nop @@ -502,7 +589,6 @@ setup_trap_table: 2: wrpr %g0, %g0, %wstate - wrpr %o1, 0x0, %pstate call init_irqwork_curcpu nop @@ -517,7 +603,7 @@ setup_trap_table: restore .globl setup_tba -setup_tba: /* i0 = is_starfire */ +setup_tba: save %sp, -192, %sp /* The boot processor is the only cpu which invokes this @@ -536,31 +622,35 @@ setup_tba: /* i0 = is_starfire */ restore sparc64_boot_end: -#include "systbls.S" #include "ktlb.S" +#include "tsb.S" #include "etrap.S" #include "rtrap.S" #include "winfixup.S" #include "entry.S" +#include "sun4v_tlb_miss.S" +#include "sun4v_ivec.S" /* * The following skip makes sure the trap table in ttable.S is aligned * on a 32K boundary as required by the v9 specs for TBA register. + * + * We align to a 32K boundary, then we have the 32K kernel TSB, + * then the 32K aligned trap table. */ 1: .skip 0x4000 + _start - 1b -#ifdef CONFIG_SBUS -/* This is just a hack to fool make depend config.h discovering - strategy: As the .S files below need config.h, but - make depend does not find it for them, we include config.h - in head.S */ -#endif + .globl swapper_tsb +swapper_tsb: + .skip (32 * 1024) ! 0x0000000000408000 #include "ttable.S" +#include "systbls.S" + .data .align 8 .globl prom_tba, tlb_type diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 233526b..8c93ba6 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -21,6 +21,7 @@ #include <linux/delay.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/bootmem.h> #include <asm/ptrace.h> #include <asm/processor.h> @@ -39,6 +40,7 @@ #include <asm/cache.h> #include <asm/cpudata.h> #include <asm/auxio.h> +#include <asm/head.h> #ifdef CONFIG_SMP static void distribute_irqs(void); @@ -136,12 +138,48 @@ out_unlock: return 0; } +extern unsigned long real_hard_smp_processor_id(void); + +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) +{ + unsigned int tid; + + if (this_is_starfire) { + tid = starfire_translate(imap, cpuid); + tid <<= IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } else { + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + unsigned long ver; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_JBUS; + } else { + unsigned int a = cpuid & 0x1f; + unsigned int n = (cpuid >> 5) & 0x1f; + + tid = ((a << IMAP_AID_SHIFT) | + (n << IMAP_NID_SHIFT)); + tid &= (IMAP_AID_SAFARI | + IMAP_NID_SAFARI);; + } + } else { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } + } + + return tid; +} + /* Now these are always passed a true fully specified sun4u INO. */ void enable_irq(unsigned int irq) { struct ino_bucket *bucket = __bucket(irq); - unsigned long imap; - unsigned long tid; + unsigned long imap, cpuid; imap = bucket->imap; if (imap == 0UL) @@ -149,47 +187,38 @@ void enable_irq(unsigned int irq) preempt_disable(); - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - unsigned long ver; - - __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { - /* We set it to our JBUS ID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_JBUS_CONFIG)); - tid = ((tid & (0x1fUL<<17)) << 9); - tid &= IMAP_TID_JBUS; - } else { - /* We set it to our Safari AID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_SAFARI_CONFIG)); - tid = ((tid & (0x3ffUL<<17)) << 9); - tid &= IMAP_AID_SAFARI; - } - } else if (this_is_starfire == 0) { - /* We set it to our UPA MID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_UPA_CONFIG)); - tid = ((tid & UPA_CONFIG_MID) << 9); - tid &= IMAP_TID_UPA; + /* This gets the physical processor ID, even on uniprocessor, + * so we can always program the interrupt target correctly. + */ + cpuid = real_hard_smp_processor_id(); + + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(irq); + int err; + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk("sun4v_intr_settarget(%x,%lu): err(%d)\n", + ino, cpuid, err); + err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED); + if (err != HV_EOK) + printk("sun4v_intr_setenabled(%x): err(%d)\n", + ino, err); } else { - tid = (starfire_translate(imap, smp_processor_id()) << 26); - tid &= IMAP_TID_UPA; + unsigned int tid = sun4u_compute_tid(imap, cpuid); + + /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product + * of this SYSIO's preconfigured IGN in the SYSIO Control + * Register, the hardware just mirrors that value here. + * However for Graphics and UPA Slave devices the full + * IMAP_INR field can be set by the programmer here. + * + * Things like FFB can now be handled via the new IRQ + * mechanism. + */ + upa_writel(tid | IMAP_VALID, imap); } - /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product - * of this SYSIO's preconfigured IGN in the SYSIO Control - * Register, the hardware just mirrors that value here. - * However for Graphics and UPA Slave devices the full - * IMAP_INR field can be set by the programmer here. - * - * Things like FFB can now be handled via the new IRQ mechanism. - */ - upa_writel(tid | IMAP_VALID, imap); - preempt_enable(); } @@ -201,16 +230,26 @@ void disable_irq(unsigned int irq) imap = bucket->imap; if (imap != 0UL) { - u32 tmp; + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(irq); + int err; + + err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); + if (err != HV_EOK) + printk("sun4v_intr_setenabled(%x): " + "err(%d)\n", ino, err); + } else { + u32 tmp; - /* NOTE: We do not want to futz with the IRQ clear registers - * and move the state to IDLE, the SCSI code does call - * disable_irq() to assure atomicity in the queue cmd - * SCSI adapter driver code. Thus we'd lose interrupts. - */ - tmp = upa_readl(imap); - tmp &= ~IMAP_VALID; - upa_writel(tmp, imap); + /* NOTE: We do not want to futz with the IRQ clear registers + * and move the state to IDLE, the SCSI code does call + * disable_irq() to assure atomicity in the queue cmd + * SCSI adapter driver code. Thus we'd lose interrupts. + */ + tmp = upa_readl(imap); + tmp &= ~IMAP_VALID; + upa_writel(tmp, imap); + } } } @@ -248,6 +287,8 @@ unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long return __irq(&pil0_dummy_bucket); } + BUG_ON(tlb_type == hypervisor); + /* RULE: Both must be specified in all other cases. */ if (iclr == 0UL || imap == 0UL) { prom_printf("Invalid build_irq %d %d %016lx %016lx\n", @@ -275,12 +316,11 @@ unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long goto out; } - bucket->irq_info = kmalloc(sizeof(struct irq_desc), GFP_ATOMIC); + bucket->irq_info = kzalloc(sizeof(struct irq_desc), GFP_ATOMIC); if (!bucket->irq_info) { prom_printf("IRQ: Error, kmalloc(irq_desc) failed.\n"); prom_halt(); } - memset(bucket->irq_info, 0, sizeof(struct irq_desc)); /* Ok, looks good, set it up. Don't touch the irq_chain or * the pending flag. @@ -294,6 +334,37 @@ out: return __irq(bucket); } +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags) +{ + struct ino_bucket *bucket; + unsigned long sysino; + + sysino = sun4v_devino_to_sysino(devhandle, devino); + + bucket = &ivector_table[sysino]; + + /* Catch accidental accesses to these things. IMAP/ICLR handling + * is done by hypervisor calls on sun4v platforms, not by direct + * register accesses. + * + * But we need to make them look unique for the disable_irq() logic + * in free_irq(). + */ + bucket->imap = ~0UL - sysino; + bucket->iclr = ~0UL - sysino; + + bucket->pil = pil; + bucket->flags = flags; + + bucket->irq_info = kzalloc(sizeof(struct irq_desc), GFP_ATOMIC); + if (!bucket->irq_info) { + prom_printf("IRQ: Error, kmalloc(irq_desc) failed.\n"); + prom_halt(); + } + + return __irq(bucket); +} + static void atomic_bucket_insert(struct ino_bucket *bucket) { unsigned long pstate; @@ -482,7 +553,6 @@ void free_irq(unsigned int irq, void *dev_id) bucket = __bucket(irq); if (bucket != &pil0_dummy_bucket) { struct irq_desc *desc = bucket->irq_info; - unsigned long imap = bucket->imap; int ent, i; for (i = 0; i < MAX_IRQ_DESC_ACTION; i++) { @@ -495,6 +565,8 @@ void free_irq(unsigned int irq, void *dev_id) } if (!desc->action_active_mask) { + unsigned long imap = bucket->imap; + /* This unique interrupt source is now inactive. */ bucket->flags &= ~IBF_ACTIVE; @@ -592,7 +664,18 @@ static void process_bucket(int irq, struct ino_bucket *bp, struct pt_regs *regs) break; } if (bp->pil != 0) { - upa_writel(ICLR_IDLE, bp->iclr); + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(bp); + int err; + + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk("sun4v_intr_setstate(%x): " + "err(%d)\n", ino, err); + } else { + upa_writel(ICLR_IDLE, bp->iclr); + } + /* Test and add entropy */ if (random & SA_SAMPLE_RANDOM) add_interrupt_randomness(irq); @@ -694,7 +777,7 @@ irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) val = readb(auxio_register); val |= AUXIO_AUX1_FTCNT; writeb(val, auxio_register); - val &= AUXIO_AUX1_FTCNT; + val &= ~AUXIO_AUX1_FTCNT; writeb(val, auxio_register); doing_pdma = 0; @@ -727,25 +810,23 @@ EXPORT_SYMBOL(probe_irq_off); static int retarget_one_irq(struct irqaction *p, int goal_cpu) { struct ino_bucket *bucket = get_ino_in_irqaction(p) + ivector_table; - unsigned long imap = bucket->imap; - unsigned int tid; while (!cpu_online(goal_cpu)) { if (++goal_cpu >= NR_CPUS) goal_cpu = 0; } - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - tid = goal_cpu << 26; - tid &= IMAP_AID_SAFARI; - } else if (this_is_starfire == 0) { - tid = goal_cpu << 26; - tid &= IMAP_TID_UPA; + if (tlb_type == hypervisor) { + unsigned int ino = __irq_ino(bucket); + + sun4v_intr_settarget(ino, goal_cpu); + sun4v_intr_setenabled(ino, HV_INTR_ENABLED); } else { - tid = (starfire_translate(imap, goal_cpu) << 26); - tid &= IMAP_TID_UPA; + unsigned long imap = bucket->imap; + unsigned int tid = sun4u_compute_tid(imap, goal_cpu); + + upa_writel(tid | IMAP_VALID, imap); } - upa_writel(tid | IMAP_VALID, imap); do { if (++goal_cpu >= NR_CPUS) @@ -848,33 +929,114 @@ static void kill_prom_timer(void) void init_irqwork_curcpu(void) { - register struct irq_work_struct *workp asm("o2"); - register unsigned long tmp asm("o3"); int cpu = hard_smp_processor_id(); - memset(__irq_work + cpu, 0, sizeof(*workp)); - - /* Make sure we are called with PSTATE_IE disabled. */ - __asm__ __volatile__("rdpr %%pstate, %0\n\t" - : "=r" (tmp)); - if (tmp & PSTATE_IE) { - prom_printf("BUG: init_irqwork_curcpu() called with " - "PSTATE_IE enabled, bailing.\n"); - __asm__ __volatile__("mov %%i7, %0\n\t" - : "=r" (tmp)); - prom_printf("BUG: Called from %lx\n", tmp); + memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct)); +} + +static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type) +{ + unsigned long num_entries = 128; + unsigned long status; + + status = sun4v_cpu_qconf(type, paddr, num_entries); + if (status != HV_EOK) { + prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, " + "err %lu\n", type, paddr, num_entries, status); prom_halt(); } +} - /* Set interrupt globals. */ - workp = &__irq_work[cpu]; - __asm__ __volatile__( - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate\n\t" - "mov %2, %%g6\n\t" - "wrpr %0, 0x0, %%pstate\n\t" - : "=&r" (tmp) - : "i" (PSTATE_IG), "r" (workp)); +static void __cpuinit sun4v_register_mondo_queues(int this_cpu) +{ + struct trap_per_cpu *tb = &trap_block[this_cpu]; + + register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO); + register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO); + register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR); + register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR); +} + +static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, int use_bootmem) +{ + void *page; + + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); + prom_halt(); + } + + *pa_ptr = __pa(page); +} + +static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, int use_bootmem) +{ + void *page; + + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); + prom_halt(); + } + + *pa_ptr = __pa(page); +} + +static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem) +{ +#ifdef CONFIG_SMP + void *page; + + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_halt(); + } + + tb->cpu_mondo_block_pa = __pa(page); + tb->cpu_list_pa = __pa(page + 64); +#endif +} + +/* Allocate and register the mondo and error queues for this cpu. */ +void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load) +{ + struct trap_per_cpu *tb = &trap_block[cpu]; + + if (alloc) { + alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem); + alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem); + alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem); + alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem); + alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem); + alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem); + + init_cpu_send_mondo_info(tb, use_bootmem); + } + + if (load) { + if (cpu != hard_smp_processor_id()) { + prom_printf("SUN4V: init mondo on cpu %d not %d\n", + cpu, hard_smp_processor_id()); + prom_halt(); + } + sun4v_register_mondo_queues(cpu); + } } /* Only invoked on boot processor. */ @@ -884,6 +1046,9 @@ void __init init_IRQ(void) kill_prom_timer(); memset(&ivector_table[0], 0, sizeof(ivector_table)); + if (tlb_type == hypervisor) + sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1); + /* We need to clear any IRQ's pending in the soft interrupt * registers, a spurious one could be left around from the * PROM timer which we just disabled. diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S deleted file mode 100644 index 4951ff8..0000000 --- a/arch/sparc64/kernel/itlb_base.S +++ /dev/null @@ -1,79 +0,0 @@ -/* $Id: itlb_base.S,v 1.12 2002/02/09 19:49:30 davem Exp $ - * itlb_base.S: Front end to ITLB miss replacement strategy. - * This is included directly into the trap table. - * - * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#if PAGE_SHIFT == 13 -/* - * To compute vpte offset, we need to do ((addr >> 13) << 3), - * which can be optimized to (addr >> 10) if bits 10/11/12 can - * be guaranteed to be 0 ... mmu_context.h does guarantee this - * by only using 10 bits in the hwcontext value. - */ -#define CREATE_VPTE_OFFSET1(r1, r2) \ - srax r1, 10, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) nop -#else /* PAGE_SHIFT */ -#define CREATE_VPTE_OFFSET1(r1, r2) \ - srax r1, PAGE_SHIFT, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) \ - sllx r2, 3, r2 -#endif /* PAGE_SHIFT */ - - -/* Ways we can get here: - * - * 1) Nucleus instruction misses from module code. - * 2) All user instruction misses. - * - * All real page faults merge their code paths to the - * sparc64_realfault_common label below. - */ - -/* ITLB ** ICACHE line 1: Quick user TLB misses */ - mov TLB_SFSR, %g1 - ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS - CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset - CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset - ldxa [%g3 + %g6] ASI_P, %g5 ! Load VPTE -1: brgez,pn %g5, 3f ! Not valid, branch out - sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot - andcc %g5, %g4, %g0 ! Executable? - -/* ITLB ** ICACHE line 2: Real faults */ - be,pn %xcc, 3f ! Nope, branch. - nop ! Delay-slot -2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB - retry ! Trap return -3: rdpr %pstate, %g4 ! Move into alt-globals - wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tpc, %g5 ! And load faulting VA - mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB - -/* ITLB ** ICACHE line 3: Finish faults */ -sparc64_realfault_common: ! Called by dtlb_miss - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] - ba,pt %xcc, etrap ! Save state -1: rd %pc, %g7 ! ... - call do_sparc64_fault ! Call fault handler - add %sp, PTREGS_OFF, %o0! Compute pt_regs arg - ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state - nop - -/* ITLB ** ICACHE line 4: Window fixups */ -winfix_trampoline: - rdpr %tpc, %g3 ! Prepare winfixup TNPC - or %g3, 0x7c, %g3 ! Compute branch offset - wrpr %g3, %tnpc ! Write it into TNPC - done ! Do it to it - nop - nop - nop - nop - -#undef CREATE_VPTE_OFFSET1 -#undef CREATE_VPTE_OFFSET2 diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S new file mode 100644 index 0000000..ad46e20 --- /dev/null +++ b/arch/sparc64/kernel/itlb_miss.S @@ -0,0 +1,39 @@ +/* ITLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context + brz,pn %g5, kvmap_itlb ! Context 0 processing + srlx %g6, 22, %g6 ! Delay slot + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + +/* ITLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_itlb ! Miss + mov FAULT_CODE_ITLB, %g3 + andcc %g5, _PAGE_EXEC_4U, %g0 ! Executable? + be,pn %xcc, tsb_do_fault + nop ! Delay slot, fill me + stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB + retry ! Trap done + nop + +/* ITLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* ITLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index d9244d3..31da1e5 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -4,191 +4,276 @@ * Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de) * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) * Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz) -*/ + */ #include <linux/config.h> #include <asm/head.h> #include <asm/asi.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/tsb.h> .text .align 32 -/* - * On a second level vpte miss, check whether the original fault is to the OBP - * range (note that this is only possible for instruction miss, data misses to - * obp range do not use vpte). If so, go back directly to the faulting address. - * This is because we want to read the tpc, otherwise we have no way of knowing - * the 8k aligned faulting address if we are using >8k kernel pagesize. This - * also ensures no vpte range addresses are dropped into tlb while obp is - * executing (see inherit_locked_prom_mappings() rant). - */ -sparc64_vpte_nucleus: - /* Note that kvmap below has verified that the address is - * in the range MODULES_VADDR --> VMALLOC_END already. So - * here we need only check if it is an OBP address or not. +kvmap_itlb: + /* g6: TAG TARGET */ + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + + /* sun4v_itlb_miss branches here with the missing virtual + * address already loaded into %g4 */ +kvmap_itlb_4v: + +kvmap_itlb_nonlinear: + /* Catch kernel NULL pointer calls. */ + sethi %hi(PAGE_SIZE), %g5 + cmp %g4, %g5 + bleu,pn %xcc, kvmap_dtlb_longpath + nop + + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load) + +kvmap_itlb_tsb_miss: sethi %hi(LOW_OBP_ADDRESS), %g5 cmp %g4, %g5 - blu,pn %xcc, kern_vpte + blu,pn %xcc, kvmap_itlb_vmalloc_addr mov 0x1, %g5 sllx %g5, 32, %g5 cmp %g4, %g5 - blu,pn %xcc, vpte_insn_obp + blu,pn %xcc, kvmap_itlb_obp nop - /* These two instructions are patched by paginig_init(). */ -kern_vpte: - sethi %hi(swapper_pgd_zero), %g5 - lduw [%g5 + %lo(swapper_pgd_zero)], %g5 +kvmap_itlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) - /* With kernel PGD in %g5, branch back into dtlb_backend. */ - ba,pt %xcc, sparc64_kpte_continue - andn %g1, 0x3, %g1 /* Finish PMD offset adjustment. */ + KTSB_LOCK_TAG(%g1, %g2, %g7) -vpte_noent: - /* Restore previous TAG_ACCESS, %g5 is zero, and we will - * skip over the trap instruction so that the top level - * TLB miss handler will thing this %g5 value is just an - * invalid PTE, thus branching to full fault processing. - */ - mov TLB_SFSR, %g1 - stxa %g4, [%g1 + %g1] ASI_DMMU - done - -vpte_insn_obp: - /* Behave as if we are at TL0. */ - wrpr %g0, 1, %tl - rdpr %tpc, %g4 /* Find original faulting iaddr */ - srlx %g4, 13, %g4 /* Throw out context bits */ - sllx %g4, 13, %g4 /* g4 has vpn + ctx0 now */ - - /* Restore previous TAG_ACCESS. */ - mov TLB_SFSR, %g1 - stxa %g4, [%g1 + %g1] ASI_IMMU - - sethi %hi(prom_trans), %g5 - or %g5, %lo(prom_trans), %g5 - -1: ldx [%g5 + 0x00], %g6 ! base - brz,a,pn %g6, longpath ! no more entries, fail - mov TLB_SFSR, %g1 ! and restore %g1 - ldx [%g5 + 0x08], %g1 ! len - add %g6, %g1, %g1 ! end - cmp %g6, %g4 - bgu,pt %xcc, 2f - cmp %g4, %g1 - bgeu,pt %xcc, 2f - ldx [%g5 + 0x10], %g1 ! PTE - - /* TLB load, restore %g1, and return from trap. */ - sub %g4, %g6, %g6 - add %g1, %g6, %g5 - mov TLB_SFSR, %g1 - stxa %g5, [%g0] ASI_ITLB_DATA_IN - retry + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_itlb_longpath + KTSB_STORE(%g1, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ -2: ba,pt %xcc, 1b - add %g5, (3 * 8), %g5 ! next entry - -kvmap_do_obp: - sethi %hi(prom_trans), %g5 - or %g5, %lo(prom_trans), %g5 - srlx %g4, 13, %g4 - sllx %g4, 13, %g4 - -1: ldx [%g5 + 0x00], %g6 ! base - brz,a,pn %g6, longpath ! no more entries, fail - mov TLB_SFSR, %g1 ! and restore %g1 - ldx [%g5 + 0x08], %g1 ! len - add %g6, %g1, %g1 ! end - cmp %g6, %g4 - bgu,pt %xcc, 2f - cmp %g4, %g1 - bgeu,pt %xcc, 2f - ldx [%g5 + 0x10], %g1 ! PTE - - /* TLB load, restore %g1, and return from trap. */ - sub %g4, %g6, %g6 - add %g1, %g6, %g5 - mov TLB_SFSR, %g1 - stxa %g5, [%g0] ASI_DTLB_DATA_IN +kvmap_itlb_load: + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 -2: ba,pt %xcc, 1b - add %g5, (3 * 8), %g5 ! next entry +kvmap_itlb_longpath: + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + nop + .previous + + rdpr %tpc, %g5 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_ITLB, %g4 + +kvmap_itlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_itlb_load + nop + +kvmap_dtlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_dtlb_load + nop -/* - * On a first level data miss, check whether this is to the OBP range (note - * that such accesses can be made by prom, as well as by kernel using - * prom_getproperty on "address"), and if so, do not use vpte access ... - * rather, use information saved during inherit_prom_mappings() using 8k - * pagesize. - */ .align 32 -kvmap: - brgez,pn %g4, kvmap_nonlinear +kvmap_dtlb_tsb4m_load: + KTSB_LOCK_TAG(%g1, %g2, %g7) + KTSB_WRITE(%g1, %g5, %g6) + ba,pt %xcc, kvmap_dtlb_load nop -#ifdef CONFIG_DEBUG_PAGEALLOC +kvmap_dtlb: + /* %g6: TAG TARGET */ + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + + /* sun4v_dtlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +kvmap_dtlb_4v: + brgez,pn %g4, kvmap_dtlb_nonlinear + nop + + /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ + KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) + + /* TSB entry address left in %g1, lookup linear PTE. + * Must preserve %g1 and %g6 (TAG). + */ +kvmap_dtlb_tsb4m_miss: + sethi %hi(kpte_linear_bitmap), %g2 + or %g2, %lo(kpte_linear_bitmap), %g2 + + /* Clear the PAGE_OFFSET top virtual bits, then shift + * down to get a 256MB physical address index. + */ + sllx %g4, 21, %g5 + mov 1, %g7 + srlx %g5, 21 + 28, %g5 + + /* Don't try this at home kids... this depends upon srlx + * only taking the low 6 bits of the shift count in %g5. + */ + sllx %g7, %g5, %g7 + + /* Divide by 64 to get the offset into the bitmask. */ + srlx %g5, 6, %g5 + sllx %g5, 3, %g5 + + /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ + ldx [%g2 + %g5], %g2 + andcc %g2, %g7, %g0 + sethi %hi(kern_linear_pte_xor), %g5 + or %g5, %lo(kern_linear_pte_xor), %g5 + bne,a,pt %xcc, 1f + add %g5, 8, %g5 + +1: ldx [%g5], %g2 + .globl kvmap_linear_patch kvmap_linear_patch: -#endif - ba,pt %xcc, kvmap_load + ba,pt %xcc, kvmap_dtlb_tsb4m_load xor %g2, %g4, %g5 -#ifdef CONFIG_DEBUG_PAGEALLOC - sethi %hi(swapper_pg_dir), %g5 - or %g5, %lo(swapper_pg_dir), %g5 - sllx %g4, 64 - (PGDIR_SHIFT + PGDIR_BITS), %g6 - srlx %g6, 64 - PAGE_SHIFT, %g6 - andn %g6, 0x3, %g6 - lduw [%g5 + %g6], %g5 - brz,pn %g5, longpath - sllx %g4, 64 - (PMD_SHIFT + PMD_BITS), %g6 - srlx %g6, 64 - PAGE_SHIFT, %g6 - sllx %g5, 11, %g5 - andn %g6, 0x3, %g6 - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g4, 64 - PMD_SHIFT, %g6 - srlx %g6, 64 - PAGE_SHIFT, %g6 - sllx %g5, 11, %g5 - andn %g6, 0x7, %g6 - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath +kvmap_dtlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_dtlb_longpath + KTSB_STORE(%g1, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ + +kvmap_dtlb_load: + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 + +kvmap_dtlb_nonlinear: + /* Catch kernel NULL pointer derefs. */ + sethi %hi(PAGE_SIZE), %g5 + cmp %g4, %g5 + bleu,pn %xcc, kvmap_dtlb_longpath nop - ba,a,pt %xcc, kvmap_load -#endif -kvmap_nonlinear: + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) + +kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 - blu,pn %xcc, longpath + blu,pn %xcc, kvmap_dtlb_longpath mov (VMALLOC_END >> 24), %g5 sllx %g5, 24, %g5 cmp %g4, %g5 - bgeu,pn %xcc, longpath + bgeu,pn %xcc, kvmap_dtlb_longpath nop kvmap_check_obp: sethi %hi(LOW_OBP_ADDRESS), %g5 cmp %g4, %g5 - blu,pn %xcc, kvmap_vmalloc_addr + blu,pn %xcc, kvmap_dtlb_vmalloc_addr mov 0x1, %g5 sllx %g5, 32, %g5 cmp %g4, %g5 - blu,pn %xcc, kvmap_do_obp + blu,pn %xcc, kvmap_dtlb_obp nop - -kvmap_vmalloc_addr: - /* If we get here, a vmalloc addr was accessed, load kernel VPTE. */ - ldxa [%g3 + %g6] ASI_N, %g5 - brgez,pn %g5, longpath + ba,pt %xcc, kvmap_dtlb_vmalloc_addr nop -kvmap_load: - /* PTE is valid, load into TLB and return from trap. */ - stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB - retry +kvmap_dtlb_longpath: + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g5 + .previous + + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g5 + .section .sun4v_2insn_patch, "ax" + .word 661b + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 + nop + .previous + + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline + nop diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 2ff7c32..95ffa94 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -188,6 +188,7 @@ extern void psycho_init(int, char *); extern void schizo_init(int, char *); extern void schizo_plus_init(int, char *); extern void tomatillo_init(int, char *); +extern void sun4v_pci_init(int, char *); static struct { char *model_name; @@ -204,6 +205,7 @@ static struct { { "pci108e,8002", schizo_plus_init }, { "SUNW,tomatillo", tomatillo_init }, { "pci108e,a801", tomatillo_init }, + { "SUNW,sun4v-pci", sun4v_pci_init }, }; #define PCI_NUM_CONTROLLER_TYPES (sizeof(pci_controller_table) / \ sizeof(pci_controller_table[0])) @@ -283,6 +285,12 @@ int __init pcic_present(void) return pci_controller_scan(pci_is_controller); } +struct pci_iommu_ops *pci_iommu_ops; +EXPORT_SYMBOL(pci_iommu_ops); + +extern struct pci_iommu_ops pci_sun4u_iommu_ops, + pci_sun4v_iommu_ops; + /* Find each controller in the system, attach and initialize * software state structure for each and link into the * pci_controller_root. Setup the controller enough such @@ -290,6 +298,11 @@ int __init pcic_present(void) */ static void __init pci_controller_probe(void) { + if (tlb_type == hypervisor) + pci_iommu_ops = &pci_sun4v_iommu_ops; + else + pci_iommu_ops = &pci_sun4u_iommu_ops; + printk("PCI: Probing for controllers.\n"); pci_controller_scan(pci_controller_init); diff --git a/arch/sparc64/kernel/pci_common.c b/arch/sparc64/kernel/pci_common.c index 58310aa..33dedb1 100644 --- a/arch/sparc64/kernel/pci_common.c +++ b/arch/sparc64/kernel/pci_common.c @@ -39,6 +39,8 @@ static int __init find_device_prom_node(struct pci_pbm_info *pbm, { int node; + *nregs = 0; + /* * Return the PBM's PROM node in case we are it's PCI device, * as the PBM's reg property is different to standard PCI reg @@ -51,10 +53,8 @@ static int __init find_device_prom_node(struct pci_pbm_info *pbm, pdev->device == PCI_DEVICE_ID_SUN_SCHIZO || pdev->device == PCI_DEVICE_ID_SUN_TOMATILLO || pdev->device == PCI_DEVICE_ID_SUN_SABRE || - pdev->device == PCI_DEVICE_ID_SUN_HUMMINGBIRD)) { - *nregs = 0; + pdev->device == PCI_DEVICE_ID_SUN_HUMMINGBIRD)) return bus_prom_node; - } node = prom_getchild(bus_prom_node); while (node != 0) { @@ -541,135 +541,183 @@ void __init pci_assign_unassigned(struct pci_pbm_info *pbm, pci_assign_unassigned(pbm, bus); } -static int __init pci_intmap_match(struct pci_dev *pdev, unsigned int *interrupt) +static inline unsigned int pci_slot_swivel(struct pci_pbm_info *pbm, + struct pci_dev *toplevel_pdev, + struct pci_dev *pdev, + unsigned int interrupt) { - struct linux_prom_pci_intmap bridge_local_intmap[PROM_PCIIMAP_MAX], *intmap; - struct linux_prom_pci_intmask bridge_local_intmask, *intmask; - struct pcidev_cookie *dev_pcp = pdev->sysdata; - struct pci_pbm_info *pbm = dev_pcp->pbm; - struct linux_prom_pci_registers *pregs = dev_pcp->prom_regs; - unsigned int hi, mid, lo, irq; - int i, num_intmap, map_slot; + unsigned int ret; - intmap = &pbm->pbm_intmap[0]; - intmask = &pbm->pbm_intmask; - num_intmap = pbm->num_pbm_intmap; - map_slot = 0; + if (unlikely(interrupt < 1 || interrupt > 4)) { + printk("%s: Device %s interrupt value of %u is strange.\n", + pbm->name, pci_name(pdev), interrupt); + return interrupt; + } - /* If we are underneath a PCI bridge, use PROM register - * property of the parent bridge which is closest to - * the PBM. - * - * However if that parent bridge has interrupt map/mask - * properties of its own we use the PROM register property - * of the next child device on the path to PDEV. - * - * In detail the two cases are (note that the 'X' below is the - * 'next child on the path to PDEV' mentioned above): - * - * 1) PBM --> PCI bus lacking int{map,mask} --> X ... PDEV - * - * Here we use regs of 'PCI bus' device. - * - * 2) PBM --> PCI bus with int{map,mask} --> X ... PDEV - * - * Here we use regs of 'X'. Note that X can be PDEV. - */ - if (pdev->bus->number != pbm->pci_first_busno) { - struct pcidev_cookie *bus_pcp, *regs_pcp; - struct pci_dev *bus_dev, *regs_dev; - int plen; + ret = ((interrupt - 1 + (PCI_SLOT(pdev->devfn) & 3)) & 3) + 1; + + printk("%s: %s IRQ Swivel %s [%x:%x] -> [%x]\n", + pbm->name, pci_name(toplevel_pdev), pci_name(pdev), + interrupt, PCI_SLOT(pdev->devfn), ret); + + return ret; +} + +static inline unsigned int pci_apply_intmap(struct pci_pbm_info *pbm, + struct pci_dev *toplevel_pdev, + struct pci_dev *pbus, + struct pci_dev *pdev, + unsigned int interrupt, + unsigned int *cnode) +{ + struct linux_prom_pci_intmap imap[PROM_PCIIMAP_MAX]; + struct linux_prom_pci_intmask imask; + struct pcidev_cookie *pbus_pcp = pbus->sysdata; + struct pcidev_cookie *pdev_pcp = pdev->sysdata; + struct linux_prom_pci_registers *pregs = pdev_pcp->prom_regs; + int plen, num_imap, i; + unsigned int hi, mid, lo, irq, orig_interrupt; + + *cnode = pbus_pcp->prom_node; + + plen = prom_getproperty(pbus_pcp->prom_node, "interrupt-map", + (char *) &imap[0], sizeof(imap)); + if (plen <= 0 || + (plen % sizeof(struct linux_prom_pci_intmap)) != 0) { + printk("%s: Device %s interrupt-map has bad len %d\n", + pbm->name, pci_name(pbus), plen); + goto no_intmap; + } + num_imap = plen / sizeof(struct linux_prom_pci_intmap); + + plen = prom_getproperty(pbus_pcp->prom_node, "interrupt-map-mask", + (char *) &imask, sizeof(imask)); + if (plen <= 0 || + (plen % sizeof(struct linux_prom_pci_intmask)) != 0) { + printk("%s: Device %s interrupt-map-mask has bad len %d\n", + pbm->name, pci_name(pbus), plen); + goto no_intmap; + } + + orig_interrupt = interrupt; - bus_dev = pdev->bus->self; - regs_dev = pdev; + hi = pregs->phys_hi & imask.phys_hi; + mid = pregs->phys_mid & imask.phys_mid; + lo = pregs->phys_lo & imask.phys_lo; + irq = interrupt & imask.interrupt; - while (bus_dev->bus && - bus_dev->bus->number != pbm->pci_first_busno) { - regs_dev = bus_dev; - bus_dev = bus_dev->bus->self; + for (i = 0; i < num_imap; i++) { + if (imap[i].phys_hi == hi && + imap[i].phys_mid == mid && + imap[i].phys_lo == lo && + imap[i].interrupt == irq) { + *cnode = imap[i].cnode; + interrupt = imap[i].cinterrupt; } + } - regs_pcp = regs_dev->sysdata; - pregs = regs_pcp->prom_regs; + printk("%s: %s MAP BUS %s DEV %s [%x] -> [%x]\n", + pbm->name, pci_name(toplevel_pdev), + pci_name(pbus), pci_name(pdev), + orig_interrupt, interrupt); - bus_pcp = bus_dev->sysdata; +no_intmap: + return interrupt; +} - /* But if the PCI bridge has it's own interrupt map - * and mask properties, use that and the regs of the - * PCI entity at the next level down on the path to the - * device. - */ - plen = prom_getproperty(bus_pcp->prom_node, "interrupt-map", - (char *) &bridge_local_intmap[0], - sizeof(bridge_local_intmap)); - if (plen != -1) { - intmap = &bridge_local_intmap[0]; - num_intmap = plen / sizeof(struct linux_prom_pci_intmap); - plen = prom_getproperty(bus_pcp->prom_node, - "interrupt-map-mask", - (char *) &bridge_local_intmask, - sizeof(bridge_local_intmask)); - if (plen == -1) { - printk("pci_intmap_match: Warning! Bridge has intmap " - "but no intmask.\n"); - printk("pci_intmap_match: Trying to recover.\n"); - return 0; - } +/* For each PCI bus on the way to the root: + * 1) If it has an interrupt-map property, apply it. + * 2) Else, swivel the interrupt number based upon the PCI device number. + * + * Return the "IRQ controller" node. If this is the PBM's device node, + * all interrupt translations are complete, else we should use that node's + * "reg" property to apply the PBM's "interrupt-{map,mask}" to the interrupt. + */ +static unsigned int __init pci_intmap_match_to_root(struct pci_pbm_info *pbm, + struct pci_dev *pdev, + unsigned int *interrupt) +{ + struct pci_dev *toplevel_pdev = pdev; + struct pcidev_cookie *toplevel_pcp = toplevel_pdev->sysdata; + unsigned int cnode = toplevel_pcp->prom_node; + + while (pdev->bus->number != pbm->pci_first_busno) { + struct pci_dev *pbus = pdev->bus->self; + struct pcidev_cookie *pcp = pbus->sysdata; + int plen; - if (pdev->bus->self != bus_dev) - map_slot = 1; + plen = prom_getproplen(pcp->prom_node, "interrupt-map"); + if (plen <= 0) { + *interrupt = pci_slot_swivel(pbm, toplevel_pdev, + pdev, *interrupt); + cnode = pcp->prom_node; } else { - pregs = bus_pcp->prom_regs; - map_slot = 1; + *interrupt = pci_apply_intmap(pbm, toplevel_pdev, + pbus, pdev, + *interrupt, &cnode); + + while (pcp->prom_node != cnode && + pbus->bus->number != pbm->pci_first_busno) { + pbus = pbus->bus->self; + pcp = pbus->sysdata; + } } - } + pdev = pbus; - if (map_slot) { - *interrupt = ((*interrupt - - 1 - + PCI_SLOT(pdev->devfn)) & 0x3) + 1; + if (cnode == pbm->prom_node) + break; } - hi = pregs->phys_hi & intmask->phys_hi; - mid = pregs->phys_mid & intmask->phys_mid; - lo = pregs->phys_lo & intmask->phys_lo; - irq = *interrupt & intmask->interrupt; - - for (i = 0; i < num_intmap; i++) { - if (intmap[i].phys_hi == hi && - intmap[i].phys_mid == mid && - intmap[i].phys_lo == lo && - intmap[i].interrupt == irq) { - *interrupt = intmap[i].cinterrupt; - printk("PCI-IRQ: Routing bus[%2x] slot[%2x] map[%d] to INO[%02x]\n", - pdev->bus->number, PCI_SLOT(pdev->devfn), - map_slot, *interrupt); - return 1; - } + return cnode; +} + +static int __init pci_intmap_match(struct pci_dev *pdev, unsigned int *interrupt) +{ + struct pcidev_cookie *dev_pcp = pdev->sysdata; + struct pci_pbm_info *pbm = dev_pcp->pbm; + struct linux_prom_pci_registers reg[PROMREG_MAX]; + unsigned int hi, mid, lo, irq; + int i, cnode, plen; + + cnode = pci_intmap_match_to_root(pbm, pdev, interrupt); + if (cnode == pbm->prom_node) + goto success; + + plen = prom_getproperty(cnode, "reg", (char *) reg, sizeof(reg)); + if (plen <= 0 || + (plen % sizeof(struct linux_prom_pci_registers)) != 0) { + printk("%s: OBP node %x reg property has bad len %d\n", + pbm->name, cnode, plen); + goto fail; } - /* We will run this code even if pbm->num_pbm_intmap is zero, just so - * we can apply the slot mapping to the PROM interrupt property value. - * So do not spit out these warnings in that case. - */ - if (num_intmap != 0) { - /* Print it both to OBP console and kernel one so that if bootup - * hangs here the user has the information to report. - */ - prom_printf("pci_intmap_match: bus %02x, devfn %02x: ", - pdev->bus->number, pdev->devfn); - prom_printf("IRQ [%08x.%08x.%08x.%08x] not found in interrupt-map\n", - pregs->phys_hi, pregs->phys_mid, pregs->phys_lo, *interrupt); - prom_printf("Please email this information to davem@redhat.com\n"); - - printk("pci_intmap_match: bus %02x, devfn %02x: ", - pdev->bus->number, pdev->devfn); - printk("IRQ [%08x.%08x.%08x.%08x] not found in interrupt-map\n", - pregs->phys_hi, pregs->phys_mid, pregs->phys_lo, *interrupt); - printk("Please email this information to davem@redhat.com\n"); + hi = reg[0].phys_hi & pbm->pbm_intmask.phys_hi; + mid = reg[0].phys_mid & pbm->pbm_intmask.phys_mid; + lo = reg[0].phys_lo & pbm->pbm_intmask.phys_lo; + irq = *interrupt & pbm->pbm_intmask.interrupt; + + for (i = 0; i < pbm->num_pbm_intmap; i++) { + struct linux_prom_pci_intmap *intmap; + + intmap = &pbm->pbm_intmap[i]; + + if (intmap->phys_hi == hi && + intmap->phys_mid == mid && + intmap->phys_lo == lo && + intmap->interrupt == irq) { + *interrupt = intmap->cinterrupt; + goto success; + } } +fail: return 0; + +success: + printk("PCI-IRQ: Routing bus[%2x] slot[%2x] to INO[%02x]\n", + pdev->bus->number, PCI_SLOT(pdev->devfn), + *interrupt); + return 1; } static void __init pdev_fixup_irq(struct pci_dev *pdev) @@ -703,16 +751,18 @@ static void __init pdev_fixup_irq(struct pci_dev *pdev) return; } - /* Fully specified already? */ - if (((prom_irq & PCI_IRQ_IGN) >> 6) == portid) { - pdev->irq = p->irq_build(pbm, pdev, prom_irq); - goto have_irq; - } + if (tlb_type != hypervisor) { + /* Fully specified already? */ + if (((prom_irq & PCI_IRQ_IGN) >> 6) == portid) { + pdev->irq = p->irq_build(pbm, pdev, prom_irq); + goto have_irq; + } - /* An onboard device? (bit 5 set) */ - if ((prom_irq & PCI_IRQ_INO) & 0x20) { - pdev->irq = p->irq_build(pbm, pdev, (portid << 6 | prom_irq)); - goto have_irq; + /* An onboard device? (bit 5 set) */ + if ((prom_irq & PCI_IRQ_INO) & 0x20) { + pdev->irq = p->irq_build(pbm, pdev, (portid << 6 | prom_irq)); + goto have_irq; + } } /* Can we find a matching entry in the interrupt-map? */ @@ -927,33 +977,30 @@ void pci_register_legacy_regions(struct resource *io_res, struct resource *p; /* VGA Video RAM. */ - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->name = "Video RAM area"; p->start = mem_res->start + 0xa0000UL; p->end = p->start + 0x1ffffUL; p->flags = IORESOURCE_BUSY; request_resource(mem_res, p); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->name = "System ROM"; p->start = mem_res->start + 0xf0000UL; p->end = p->start + 0xffffUL; p->flags = IORESOURCE_BUSY; request_resource(mem_res, p); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->name = "Video ROM"; p->start = mem_res->start + 0xc0000UL; p->end = p->start + 0x7fffUL; diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index a11910b..8efbc13 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -139,12 +139,11 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kmalloc(sz, GFP_KERNEL); + iommu->arena.map = kzalloc(sz, GFP_KERNEL); if (!iommu->arena.map) { prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); prom_halt(); } - memset(iommu->arena.map, 0, sz); iommu->arena.limit = num_tsb_entries; /* Allocate and initialize the dummy page which we @@ -219,7 +218,7 @@ static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx) * DMA for PCI device PDEV. Return non-NULL cpu-side address if * successful and set *DMA_ADDRP to the PCI side dma address. */ -void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) +static void *pci_4u_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -267,7 +266,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad } /* Free and unmap a consistent DMA translation. */ -void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) +static void pci_4u_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -294,7 +293,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ /* Map a single buffer at PTR of SZ bytes for PCI DMA * in streaming mode. */ -dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) +static dma_addr_t pci_4u_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -415,7 +414,7 @@ do_flush_sync: } /* Unmap a single streaming mode DMA translation. */ -void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +static void pci_4u_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -548,7 +547,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, * When making changes here, inspect the assembly output. I was having * hard time to kepp this routine out of using stack slots for holding variables. */ -int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +static int pci_4u_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -562,9 +561,9 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int /* Fast path single entry scatterlists. */ if (nelems == 1) { sglist->dma_address = - pci_map_single(pdev, - (page_address(sglist->page) + sglist->offset), - sglist->length, direction); + pci_4u_map_single(pdev, + (page_address(sglist->page) + sglist->offset), + sglist->length, direction); if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) return 0; sglist->dma_length = sglist->length; @@ -635,7 +634,7 @@ bad_no_ctx: } /* Unmap a set of streaming mode DMA translations. */ -void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +static void pci_4u_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -695,7 +694,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. */ -void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +static void pci_4u_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -735,7 +734,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size /* Make physical memory consistent for a set of streaming * mode DMA translations after a transfer. */ -void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +static void pci_4u_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { struct pcidev_cookie *pcp; struct pci_iommu *iommu; @@ -776,6 +775,17 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i spin_unlock_irqrestore(&iommu->lock, flags); } +struct pci_iommu_ops pci_sun4u_iommu_ops = { + .alloc_consistent = pci_4u_alloc_consistent, + .free_consistent = pci_4u_free_consistent, + .map_single = pci_4u_map_single, + .unmap_single = pci_4u_unmap_single, + .map_sg = pci_4u_map_sg, + .unmap_sg = pci_4u_unmap_sg, + .dma_sync_single_for_cpu = pci_4u_dma_sync_single_for_cpu, + .dma_sync_sg_for_cpu = pci_4u_dma_sync_sg_for_cpu, +}; + static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit) { struct pci_dev *ali_isa_bridge; diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index c03ed5f..d17878b 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -286,17 +286,17 @@ static unsigned char psycho_pil_table[] = { /*0x14*/0, 0, 0, 0, /* PCI B slot 1 Int A, B, C, D */ /*0x18*/0, 0, 0, 0, /* PCI B slot 2 Int A, B, C, D */ /*0x1c*/0, 0, 0, 0, /* PCI B slot 3 Int A, B, C, D */ -/*0x20*/4, /* SCSI */ +/*0x20*/5, /* SCSI */ /*0x21*/5, /* Ethernet */ /*0x22*/8, /* Parallel Port */ /*0x23*/13, /* Audio Record */ /*0x24*/14, /* Audio Playback */ /*0x25*/15, /* PowerFail */ -/*0x26*/4, /* second SCSI */ +/*0x26*/5, /* second SCSI */ /*0x27*/11, /* Floppy */ -/*0x28*/4, /* Spare Hardware */ +/*0x28*/5, /* Spare Hardware */ /*0x29*/9, /* Keyboard */ -/*0x2a*/4, /* Mouse */ +/*0x2a*/5, /* Mouse */ /*0x2b*/12, /* Serial */ /*0x2c*/10, /* Timer 0 */ /*0x2d*/11, /* Timer 1 */ @@ -313,11 +313,11 @@ static int psycho_ino_to_pil(struct pci_dev *pdev, unsigned int ino) ret = psycho_pil_table[ino]; if (ret == 0 && pdev == NULL) { - ret = 4; + ret = 5; } else if (ret == 0) { switch ((pdev->class >> 16) & 0xff) { case PCI_BASE_CLASS_STORAGE: - ret = 4; + ret = 5; break; case PCI_BASE_CLASS_NETWORK: @@ -336,7 +336,7 @@ static int psycho_ino_to_pil(struct pci_dev *pdev, unsigned int ino) break; default: - ret = 4; + ret = 5; break; }; } @@ -1164,7 +1164,7 @@ static void pbm_config_busmastering(struct pci_pbm_info *pbm) static void pbm_scan_bus(struct pci_controller_info *p, struct pci_pbm_info *pbm) { - struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) { prom_printf("PSYCHO: Critical allocation failure.\n"); @@ -1172,7 +1172,6 @@ static void pbm_scan_bus(struct pci_controller_info *p, } /* All we care about is the PBM. */ - memset(cookie, 0, sizeof(*cookie)); cookie->pbm = pbm; pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, @@ -1465,18 +1464,16 @@ void psycho_init(int node, char *model_name) } } - p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); + p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); if (!p) { prom_printf("PSYCHO: Fatal memory allocation error.\n"); prom_halt(); } - memset(p, 0, sizeof(*p)); - iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC); if (!iommu) { prom_printf("PSYCHO: Fatal memory allocation error.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_A.iommu = p->pbm_B.iommu = iommu; p->next = pci_controller_root; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index da8e136..f67bb7f 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -533,17 +533,17 @@ static unsigned char sabre_pil_table[] = { /*0x14*/0, 0, 0, 0, /* PCI B slot 1 Int A, B, C, D */ /*0x18*/0, 0, 0, 0, /* PCI B slot 2 Int A, B, C, D */ /*0x1c*/0, 0, 0, 0, /* PCI B slot 3 Int A, B, C, D */ -/*0x20*/4, /* SCSI */ +/*0x20*/5, /* SCSI */ /*0x21*/5, /* Ethernet */ /*0x22*/8, /* Parallel Port */ /*0x23*/13, /* Audio Record */ /*0x24*/14, /* Audio Playback */ /*0x25*/15, /* PowerFail */ -/*0x26*/4, /* second SCSI */ +/*0x26*/5, /* second SCSI */ /*0x27*/11, /* Floppy */ -/*0x28*/4, /* Spare Hardware */ +/*0x28*/5, /* Spare Hardware */ /*0x29*/9, /* Keyboard */ -/*0x2a*/4, /* Mouse */ +/*0x2a*/5, /* Mouse */ /*0x2b*/12, /* Serial */ /*0x2c*/10, /* Timer 0 */ /*0x2d*/11, /* Timer 1 */ @@ -565,11 +565,11 @@ static int sabre_ino_to_pil(struct pci_dev *pdev, unsigned int ino) ret = sabre_pil_table[ino]; if (ret == 0 && pdev == NULL) { - ret = 4; + ret = 5; } else if (ret == 0) { switch ((pdev->class >> 16) & 0xff) { case PCI_BASE_CLASS_STORAGE: - ret = 4; + ret = 5; break; case PCI_BASE_CLASS_NETWORK: @@ -588,7 +588,7 @@ static int sabre_ino_to_pil(struct pci_dev *pdev, unsigned int ino) break; default: - ret = 4; + ret = 5; break; }; } @@ -1167,7 +1167,7 @@ static void apb_init(struct pci_controller_info *p, struct pci_bus *sabre_bus) static struct pcidev_cookie *alloc_bridge_cookie(struct pci_pbm_info *pbm) { - struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) { prom_printf("SABRE: Critical allocation failure.\n"); @@ -1175,7 +1175,6 @@ static struct pcidev_cookie *alloc_bridge_cookie(struct pci_pbm_info *pbm) } /* All we care about is the PBM. */ - memset(cookie, 0, sizeof(*cookie)); cookie->pbm = pbm; return cookie; @@ -1556,19 +1555,17 @@ void sabre_init(int pnode, char *model_name) } } - p = kmalloc(sizeof(*p), GFP_ATOMIC); + p = kzalloc(sizeof(*p), GFP_ATOMIC); if (!p) { prom_printf("SABRE: Error, kmalloc(pci_controller_info) failed.\n"); prom_halt(); } - memset(p, 0, sizeof(*p)); - iommu = kmalloc(sizeof(*iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC); if (!iommu) { prom_printf("SABRE: Error, kmalloc(pci_iommu) failed.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_A.iommu = p->pbm_B.iommu = iommu; upa_portid = prom_getintdefault(pnode, "upa-portid", 0xff); diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index d8c4e09..7fe4de0 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -243,8 +243,8 @@ static unsigned char schizo_pil_table[] = { /*0x0c*/0, 0, 0, 0, /* PCI slot 3 Int A, B, C, D */ /*0x10*/0, 0, 0, 0, /* PCI slot 4 Int A, B, C, D */ /*0x14*/0, 0, 0, 0, /* PCI slot 5 Int A, B, C, D */ -/*0x18*/4, /* SCSI */ -/*0x19*/4, /* second SCSI */ +/*0x18*/5, /* SCSI */ +/*0x19*/5, /* second SCSI */ /*0x1a*/0, /* UNKNOWN */ /*0x1b*/0, /* UNKNOWN */ /*0x1c*/8, /* Parallel */ @@ -254,7 +254,7 @@ static unsigned char schizo_pil_table[] = { /*0x20*/13, /* Audio Record */ /*0x21*/14, /* Audio Playback */ /*0x22*/12, /* Serial */ -/*0x23*/4, /* EBUS I2C */ +/*0x23*/5, /* EBUS I2C */ /*0x24*/10, /* RTC Clock */ /*0x25*/11, /* Floppy */ /*0x26*/0, /* UNKNOWN */ @@ -296,11 +296,11 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino) ret = schizo_pil_table[ino]; if (ret == 0 && pdev == NULL) { - ret = 4; + ret = 5; } else if (ret == 0) { switch ((pdev->class >> 16) & 0xff) { case PCI_BASE_CLASS_STORAGE: - ret = 4; + ret = 5; break; case PCI_BASE_CLASS_NETWORK: @@ -319,7 +319,7 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino) break; default: - ret = 4; + ret = 5; break; }; } @@ -1525,7 +1525,7 @@ static void pbm_config_busmastering(struct pci_pbm_info *pbm) static void pbm_scan_bus(struct pci_controller_info *p, struct pci_pbm_info *pbm) { - struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) { prom_printf("%s: Critical allocation failure.\n", pbm->name); @@ -1533,7 +1533,6 @@ static void pbm_scan_bus(struct pci_controller_info *p, } /* All we care about is the PBM. */ - memset(cookie, 0, sizeof(*cookie)); cookie->pbm = pbm; pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, @@ -2120,27 +2119,24 @@ static void __schizo_init(int node, char *model_name, int chip_type) } } - p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); + p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); if (!p) { prom_printf("SCHIZO: Fatal memory allocation error.\n"); prom_halt(); } - memset(p, 0, sizeof(*p)); - iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC); if (!iommu) { prom_printf("SCHIZO: Fatal memory allocation error.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_A.iommu = iommu; - iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC); if (!iommu) { prom_printf("SCHIZO: Fatal memory allocation error.\n"); prom_halt(); } - memset(iommu, 0, sizeof(*iommu)); p->pbm_B.iommu = iommu; p->next = pci_controller_root; diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c new file mode 100644 index 0000000..9372d4f --- /dev/null +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -0,0 +1,1147 @@ +/* pci_sun4v.c: SUN4V specific PCI controller support. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> + +#include <asm/pbm.h> +#include <asm/iommu.h> +#include <asm/irq.h> +#include <asm/upa.h> +#include <asm/pstate.h> +#include <asm/oplib.h> +#include <asm/hypervisor.h> + +#include "pci_impl.h" +#include "iommu_common.h" + +#include "pci_sun4v.h" + +#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) + +struct pci_iommu_batch { + struct pci_dev *pdev; /* Device mapping is for. */ + unsigned long prot; /* IOMMU page protections */ + unsigned long entry; /* Index into IOTSB. */ + u64 *pglist; /* List of physical pages */ + unsigned long npages; /* Number of pages in list. */ +}; + +static DEFINE_PER_CPU(struct pci_iommu_batch, pci_iommu_batch); + +/* Interrupts must be disabled. */ +static inline void pci_iommu_batch_start(struct pci_dev *pdev, unsigned long prot, unsigned long entry) +{ + struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch); + + p->pdev = pdev; + p->prot = prot; + p->entry = entry; + p->npages = 0; +} + +/* Interrupts must be disabled. */ +static long pci_iommu_batch_flush(struct pci_iommu_batch *p) +{ + struct pcidev_cookie *pcp = p->pdev->sysdata; + unsigned long devhandle = pcp->pbm->devhandle; + unsigned long prot = p->prot; + unsigned long entry = p->entry; + u64 *pglist = p->pglist; + unsigned long npages = p->npages; + + while (npages != 0) { + long num; + + num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist)); + if (unlikely(num < 0)) { + if (printk_ratelimit()) + printk("pci_iommu_batch_flush: IOMMU map of " + "[%08lx:%08lx:%lx:%lx:%lx] failed with " + "status %ld\n", + devhandle, HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist), num); + return -1; + } + + entry += num; + npages -= num; + pglist += num; + } + + p->entry = entry; + p->npages = 0; + + return 0; +} + +/* Interrupts must be disabled. */ +static inline long pci_iommu_batch_add(u64 phys_page) +{ + struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch); + + BUG_ON(p->npages >= PGLIST_NENTS); + + p->pglist[p->npages++] = phys_page; + if (p->npages == PGLIST_NENTS) + return pci_iommu_batch_flush(p); + + return 0; +} + +/* Interrupts must be disabled. */ +static inline long pci_iommu_batch_end(void) +{ + struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch); + + BUG_ON(p->npages >= PGLIST_NENTS); + + return pci_iommu_batch_flush(p); +} + +static long pci_arena_alloc(struct pci_iommu_arena *arena, unsigned long npages) +{ + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; + } + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; + } + } + + for (i = n; i < end; i++) + __set_bit(i, arena->map); + + arena->hint = end; + + return n; +} + +static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages) +{ + unsigned long i; + + for (i = base; i < (base + npages); i++) + __clear_bit(i, arena->map); +} + +static void *pci_4v_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, order, first_page, npages, n; + void *ret; + long entry; + + size = IO_PAGE_ALIGN(size); + order = get_order(size); + if (unlikely(order >= MAX_ORDER)) + return NULL; + + npages = size >> IO_PAGE_SHIFT; + + first_page = __get_free_pages(GFP_ATOMIC, order); + if (unlikely(first_page == 0UL)) + return NULL; + + memset((char *)first_page, 0, PAGE_SIZE << order); + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + + spin_lock_irqsave(&iommu->lock, flags); + entry = pci_arena_alloc(&iommu->arena, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry < 0L)) + goto arena_alloc_fail; + + *dma_addrp = (iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT)); + ret = (void *) first_page; + first_page = __pa(first_page); + + local_irq_save(flags); + + pci_iommu_batch_start(pdev, + (HV_PCI_MAP_ATTR_READ | + HV_PCI_MAP_ATTR_WRITE), + entry); + + for (n = 0; n < npages; n++) { + long err = pci_iommu_batch_add(first_page + (n * PAGE_SIZE)); + if (unlikely(err < 0L)) + goto iommu_map_fail; + } + + if (unlikely(pci_iommu_batch_end() < 0L)) + goto iommu_map_fail; + + local_irq_restore(flags); + + return ret; + +iommu_map_fail: + /* Interrupts are disabled. */ + spin_lock(&iommu->lock); + pci_arena_free(&iommu->arena, entry, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + +arena_alloc_fail: + free_pages(first_page, order); + return NULL; +} + +static void pci_4v_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, order, npages, entry; + u32 devhandle; + + npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + devhandle = pcp->pbm->devhandle; + entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + spin_lock_irqsave(&iommu->lock, flags); + + pci_arena_free(&iommu->arena, entry, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); + + order = get_order(size); + if (order < 10) + free_pages((unsigned long)cpu, order); +} + +static dma_addr_t pci_4v_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, npages, oaddr; + unsigned long i, base_paddr; + u32 bus_addr, ret; + unsigned long prot; + long entry; + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + + if (unlikely(direction == PCI_DMA_NONE)) + goto bad; + + oaddr = (unsigned long)ptr; + npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + + spin_lock_irqsave(&iommu->lock, flags); + entry = pci_arena_alloc(&iommu->arena, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry < 0L)) + goto bad; + + bus_addr = (iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT)); + ret = bus_addr | (oaddr & ~IO_PAGE_MASK); + base_paddr = __pa(oaddr & IO_PAGE_MASK); + prot = HV_PCI_MAP_ATTR_READ; + if (direction != PCI_DMA_TODEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; + + local_irq_save(flags); + + pci_iommu_batch_start(pdev, prot, entry); + + for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { + long err = pci_iommu_batch_add(base_paddr); + if (unlikely(err < 0L)) + goto iommu_map_fail; + } + if (unlikely(pci_iommu_batch_end() < 0L)) + goto iommu_map_fail; + + local_irq_restore(flags); + + return ret; + +bad: + if (printk_ratelimit()) + WARN_ON(1); + return PCI_DMA_ERROR_CODE; + +iommu_map_fail: + /* Interrupts are disabled. */ + spin_lock(&iommu->lock); + pci_arena_free(&iommu->arena, entry, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + return PCI_DMA_ERROR_CODE; +} + +static void pci_4v_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, npages; + long entry; + u32 devhandle; + + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + devhandle = pcp->pbm->devhandle; + + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + bus_addr &= IO_PAGE_MASK; + + spin_lock_irqsave(&iommu->lock, flags); + + entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; + pci_arena_free(&iommu->arena, entry, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +#define SG_ENT_PHYS_ADDRESS(SG) \ + (__pa(page_address((SG)->page)) + (SG)->offset) + +static inline long fill_sg(long entry, struct pci_dev *pdev, + struct scatterlist *sg, + int nused, int nelems, unsigned long prot) +{ + struct scatterlist *dma_sg = sg; + struct scatterlist *sg_end = sg + nelems; + unsigned long flags; + int i; + + local_irq_save(flags); + + pci_iommu_batch_start(pdev, prot, entry); + + for (i = 0; i < nused; i++) { + unsigned long pteval = ~0UL; + u32 dma_npages; + + dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) + + dma_sg->dma_length + + ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; + do { + unsigned long offset; + signed int len; + + /* If we are here, we know we have at least one + * more page to map. So walk forward until we + * hit a page crossing, and begin creating new + * mappings from that spot. + */ + for (;;) { + unsigned long tmp; + + tmp = SG_ENT_PHYS_ADDRESS(sg); + len = sg->length; + if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { + pteval = tmp & IO_PAGE_MASK; + offset = tmp & (IO_PAGE_SIZE - 1UL); + break; + } + if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) { + pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK; + offset = 0UL; + len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL))); + break; + } + sg++; + } + + pteval = (pteval & IOPTE_PAGE); + while (len > 0) { + long err; + + err = pci_iommu_batch_add(pteval); + if (unlikely(err < 0L)) + goto iommu_map_failed; + + pteval += IO_PAGE_SIZE; + len -= (IO_PAGE_SIZE - offset); + offset = 0; + dma_npages--; + } + + pteval = (pteval & IOPTE_PAGE) + len; + sg++; + + /* Skip over any tail mappings we've fully mapped, + * adjusting pteval along the way. Stop when we + * detect a page crossing event. + */ + while (sg < sg_end && + (pteval << (64 - IO_PAGE_SHIFT)) != 0UL && + (pteval == SG_ENT_PHYS_ADDRESS(sg)) && + ((pteval ^ + (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { + pteval += sg->length; + sg++; + } + if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL) + pteval = ~0UL; + } while (dma_npages != 0); + dma_sg++; + } + + if (unlikely(pci_iommu_batch_end() < 0L)) + goto iommu_map_failed; + + local_irq_restore(flags); + return 0; + +iommu_map_failed: + local_irq_restore(flags); + return -1L; +} + +static int pci_4v_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, npages, prot; + u32 dma_base; + struct scatterlist *sgtmp; + long entry, err; + int used; + + /* Fast path single entry scatterlists. */ + if (nelems == 1) { + sglist->dma_address = + pci_4v_map_single(pdev, + (page_address(sglist->page) + sglist->offset), + sglist->length, direction); + if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) + return 0; + sglist->dma_length = sglist->length; + return 1; + } + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + + if (unlikely(direction == PCI_DMA_NONE)) + goto bad; + + /* Step 1: Prepare scatter list. */ + npages = prepare_sg(sglist, nelems); + + /* Step 2: Allocate a cluster and context, if necessary. */ + spin_lock_irqsave(&iommu->lock, flags); + entry = pci_arena_alloc(&iommu->arena, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry < 0L)) + goto bad; + + dma_base = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + + /* Step 3: Normalize DMA addresses. */ + used = nelems; + + sgtmp = sglist; + while (used && sgtmp->dma_length) { + sgtmp->dma_address += dma_base; + sgtmp++; + used--; + } + used = nelems - used; + + /* Step 4: Create the mappings. */ + prot = HV_PCI_MAP_ATTR_READ; + if (direction != PCI_DMA_TODEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; + + err = fill_sg(entry, pdev, sglist, used, nelems, prot); + if (unlikely(err < 0L)) + goto iommu_map_failed; + + return used; + +bad: + if (printk_ratelimit()) + WARN_ON(1); + return 0; + +iommu_map_failed: + spin_lock_irqsave(&iommu->lock, flags); + pci_arena_free(&iommu->arena, entry, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void pci_4v_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +{ + struct pcidev_cookie *pcp; + struct pci_iommu *iommu; + unsigned long flags, i, npages; + long entry; + u32 devhandle, bus_addr; + + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + } + + pcp = pdev->sysdata; + iommu = pcp->pbm->iommu; + devhandle = pcp->pbm->devhandle; + + bus_addr = sglist->dma_address & IO_PAGE_MASK; + + for (i = 1; i < nelems; i++) + if (sglist[i].dma_length == 0) + break; + i--; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - + bus_addr) >> IO_PAGE_SHIFT; + + entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + spin_lock_irqsave(&iommu->lock, flags); + + pci_arena_free(&iommu->arena, entry, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void pci_4v_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) +{ + /* Nothing to do... */ +} + +static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) +{ + /* Nothing to do... */ +} + +struct pci_iommu_ops pci_sun4v_iommu_ops = { + .alloc_consistent = pci_4v_alloc_consistent, + .free_consistent = pci_4v_free_consistent, + .map_single = pci_4v_map_single, + .unmap_single = pci_4v_unmap_single, + .map_sg = pci_4v_map_sg, + .unmap_sg = pci_4v_unmap_sg, + .dma_sync_single_for_cpu = pci_4v_dma_sync_single_for_cpu, + .dma_sync_sg_for_cpu = pci_4v_dma_sync_sg_for_cpu, +}; + +/* SUN4V PCI configuration space accessors. */ + +static inline int pci_sun4v_out_of_range(struct pci_pbm_info *pbm, unsigned int bus, unsigned int device, unsigned int func) +{ + if (bus == pbm->pci_first_busno) { + if (device == 0 && func == 0) + return 0; + return 1; + } + + if (bus < pbm->pci_first_busno || + bus > pbm->pci_last_busno) + return 1; + return 0; +} + +static int pci_sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 *value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + u32 devhandle = pbm->devhandle; + unsigned int bus = bus_dev->number; + unsigned int device = PCI_SLOT(devfn); + unsigned int func = PCI_FUNC(devfn); + unsigned long ret; + + if (pci_sun4v_out_of_range(pbm, bus, device, func)) { + ret = ~0UL; + } else { + ret = pci_sun4v_config_get(devhandle, + HV_PCI_DEVICE_BUILD(bus, device, func), + where, size); +#if 0 + printk("rcfg: [%x:%x:%x:%d]=[%lx]\n", + devhandle, HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, ret); +#endif + } + switch (size) { + case 1: + *value = ret & 0xff; + break; + case 2: + *value = ret & 0xffff; + break; + case 4: + *value = ret & 0xffffffff; + break; + }; + + + return PCIBIOS_SUCCESSFUL; +} + +static int pci_sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + u32 devhandle = pbm->devhandle; + unsigned int bus = bus_dev->number; + unsigned int device = PCI_SLOT(devfn); + unsigned int func = PCI_FUNC(devfn); + unsigned long ret; + + if (pci_sun4v_out_of_range(pbm, bus, device, func)) { + /* Do nothing. */ + } else { + ret = pci_sun4v_config_put(devhandle, + HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, value); +#if 0 + printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n", + devhandle, HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, value, ret); +#endif + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops pci_sun4v_ops = { + .read = pci_sun4v_read_pci_cfg, + .write = pci_sun4v_write_pci_cfg, +}; + + +static void pbm_scan_bus(struct pci_controller_info *p, + struct pci_pbm_info *pbm) +{ + struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL); + + if (!cookie) { + prom_printf("%s: Critical allocation failure.\n", pbm->name); + prom_halt(); + } + + /* All we care about is the PBM. */ + memset(cookie, 0, sizeof(*cookie)); + cookie->pbm = pbm; + + pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, p->pci_ops, pbm); +#if 0 + pci_fixup_host_bridge_self(pbm->pci_bus); + pbm->pci_bus->self->sysdata = cookie; +#endif + pci_fill_in_pbm_cookies(pbm->pci_bus, pbm, + pbm->prom_node); + pci_record_assignments(pbm, pbm->pci_bus); + pci_assign_unassigned(pbm, pbm->pci_bus); + pci_fixup_irq(pbm, pbm->pci_bus); + pci_determine_66mhz_disposition(pbm, pbm->pci_bus); + pci_setup_busmastering(pbm, pbm->pci_bus); +} + +static void pci_sun4v_scan_bus(struct pci_controller_info *p) +{ + if (p->pbm_A.prom_node) { + p->pbm_A.is_66mhz_capable = + prom_getbool(p->pbm_A.prom_node, "66mhz-capable"); + + pbm_scan_bus(p, &p->pbm_A); + } + if (p->pbm_B.prom_node) { + p->pbm_B.is_66mhz_capable = + prom_getbool(p->pbm_B.prom_node, "66mhz-capable"); + + pbm_scan_bus(p, &p->pbm_B); + } + + /* XXX register error interrupt handlers XXX */ +} + +static unsigned int pci_sun4v_irq_build(struct pci_pbm_info *pbm, + struct pci_dev *pdev, + unsigned int devino) +{ + u32 devhandle = pbm->devhandle; + int pil; + + pil = 5; + if (pdev) { + switch ((pdev->class >> 16) & 0xff) { + case PCI_BASE_CLASS_STORAGE: + pil = 5; + break; + + case PCI_BASE_CLASS_NETWORK: + pil = 6; + break; + + case PCI_BASE_CLASS_DISPLAY: + pil = 9; + break; + + case PCI_BASE_CLASS_MULTIMEDIA: + case PCI_BASE_CLASS_MEMORY: + case PCI_BASE_CLASS_BRIDGE: + case PCI_BASE_CLASS_SERIAL: + pil = 10; + break; + + default: + pil = 5; + break; + }; + } + BUG_ON(PIL_RESERVED(pil)); + + return sun4v_build_irq(devhandle, devino, pil, IBF_PCI); +} + +static void pci_sun4v_base_address_update(struct pci_dev *pdev, int resource) +{ + struct pcidev_cookie *pcp = pdev->sysdata; + struct pci_pbm_info *pbm = pcp->pbm; + struct resource *res, *root; + u32 reg; + int where, size, is_64bit; + + res = &pdev->resource[resource]; + if (resource < 6) { + where = PCI_BASE_ADDRESS_0 + (resource * 4); + } else if (resource == PCI_ROM_RESOURCE) { + where = pdev->rom_base_reg; + } else { + /* Somebody might have asked allocation of a non-standard resource */ + return; + } + + /* XXX 64-bit MEM handling is not %100 correct... XXX */ + is_64bit = 0; + if (res->flags & IORESOURCE_IO) + root = &pbm->io_space; + else { + root = &pbm->mem_space; + if ((res->flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) + == PCI_BASE_ADDRESS_MEM_TYPE_64) + is_64bit = 1; + } + + size = res->end - res->start; + pci_read_config_dword(pdev, where, ®); + reg = ((reg & size) | + (((u32)(res->start - root->start)) & ~size)); + if (resource == PCI_ROM_RESOURCE) { + reg |= PCI_ROM_ADDRESS_ENABLE; + res->flags |= IORESOURCE_ROM_ENABLE; + } + pci_write_config_dword(pdev, where, reg); + + /* This knows that the upper 32-bits of the address + * must be zero. Our PCI common layer enforces this. + */ + if (is_64bit) + pci_write_config_dword(pdev, where + 4, 0); +} + +static void pci_sun4v_resource_adjust(struct pci_dev *pdev, + struct resource *res, + struct resource *root) +{ + res->start += root->start; + res->end += root->start; +} + +/* Use ranges property to determine where PCI MEM, I/O, and Config + * space are for this PCI bus module. + */ +static void pci_sun4v_determine_mem_io_space(struct pci_pbm_info *pbm) +{ + int i, saw_mem, saw_io; + + saw_mem = saw_io = 0; + for (i = 0; i < pbm->num_pbm_ranges; i++) { + struct linux_prom_pci_ranges *pr = &pbm->pbm_ranges[i]; + unsigned long a; + int type; + + type = (pr->child_phys_hi >> 24) & 0x3; + a = (((unsigned long)pr->parent_phys_hi << 32UL) | + ((unsigned long)pr->parent_phys_lo << 0UL)); + + switch (type) { + case 1: + /* 16-bit IO space, 16MB */ + pbm->io_space.start = a; + pbm->io_space.end = a + ((16UL*1024UL*1024UL) - 1UL); + pbm->io_space.flags = IORESOURCE_IO; + saw_io = 1; + break; + + case 2: + /* 32-bit MEM space, 2GB */ + pbm->mem_space.start = a; + pbm->mem_space.end = a + (0x80000000UL - 1UL); + pbm->mem_space.flags = IORESOURCE_MEM; + saw_mem = 1; + break; + + case 3: + /* XXX 64-bit MEM handling XXX */ + + default: + break; + }; + } + + if (!saw_io || !saw_mem) { + prom_printf("%s: Fatal error, missing %s PBM range.\n", + pbm->name, + (!saw_io ? "IO" : "MEM")); + prom_halt(); + } + + printk("%s: PCI IO[%lx] MEM[%lx]\n", + pbm->name, + pbm->io_space.start, + pbm->mem_space.start); +} + +static void pbm_register_toplevel_resources(struct pci_controller_info *p, + struct pci_pbm_info *pbm) +{ + pbm->io_space.name = pbm->mem_space.name = pbm->name; + + request_resource(&ioport_resource, &pbm->io_space); + request_resource(&iomem_resource, &pbm->mem_space); + pci_register_legacy_regions(&pbm->io_space, + &pbm->mem_space); +} + +static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, + struct pci_iommu *iommu) +{ + struct pci_iommu_arena *arena = &iommu->arena; + unsigned long i, cnt = 0; + u32 devhandle; + + devhandle = pbm->devhandle; + for (i = 0; i < arena->limit; i++) { + unsigned long ret, io_attrs, ra; + + ret = pci_sun4v_iommu_getmap(devhandle, + HV_PCI_TSBID(0, i), + &io_attrs, &ra); + if (ret == HV_EOK) { + cnt++; + __set_bit(i, arena->map); + } + } + + return cnt; +} + +static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm) +{ + struct pci_iommu *iommu = pbm->iommu; + unsigned long num_tsb_entries, sz; + u32 vdma[2], dma_mask, dma_offset; + int err, tsbsize; + + err = prom_getproperty(pbm->prom_node, "virtual-dma", + (char *)&vdma[0], sizeof(vdma)); + if (err == 0 || err == -1) { + /* No property, use default values. */ + vdma[0] = 0x80000000; + vdma[1] = 0x80000000; + } + + dma_mask = vdma[0]; + switch (vdma[1]) { + case 0x20000000: + dma_mask |= 0x1fffffff; + tsbsize = 64; + break; + + case 0x40000000: + dma_mask |= 0x3fffffff; + tsbsize = 128; + break; + + case 0x80000000: + dma_mask |= 0x7fffffff; + tsbsize = 256; + break; + + default: + prom_printf("PCI-SUN4V: strange virtual-dma size.\n"); + prom_halt(); + }; + + tsbsize *= (8 * 1024); + + num_tsb_entries = tsbsize / sizeof(iopte_t); + + dma_offset = vdma[0]; + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_mask; + + /* Allocate and initialize the free area map. */ + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kmalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); + prom_halt(); + } + memset(iommu->arena.map, 0, sz); + iommu->arena.limit = num_tsb_entries; + + sz = probe_existing_entries(pbm, iommu); + + printk("%s: TSB entries [%lu], existing mapings [%lu]\n", + pbm->name, num_tsb_entries, sz); +} + +static void pci_sun4v_get_bus_range(struct pci_pbm_info *pbm) +{ + unsigned int busrange[2]; + int prom_node = pbm->prom_node; + int err; + + err = prom_getproperty(prom_node, "bus-range", + (char *)&busrange[0], + sizeof(busrange)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no bus-range.\n", pbm->name); + prom_halt(); + } + + pbm->pci_first_busno = busrange[0]; + pbm->pci_last_busno = busrange[1]; + +} + +static void pci_sun4v_pbm_init(struct pci_controller_info *p, int prom_node, u32 devhandle) +{ + struct pci_pbm_info *pbm; + int err, i; + + if (devhandle & 0x40) + pbm = &p->pbm_B; + else + pbm = &p->pbm_A; + + pbm->parent = p; + pbm->prom_node = prom_node; + pbm->pci_first_slot = 1; + + pbm->devhandle = devhandle; + + sprintf(pbm->name, "SUN4V-PCI%d PBM%c", + p->index, (pbm == &p->pbm_A ? 'A' : 'B')); + + printk("%s: devhandle[%x] prom_node[%x:%x]\n", + pbm->name, pbm->devhandle, + pbm->prom_node, prom_getchild(pbm->prom_node)); + + prom_getstring(prom_node, "name", + pbm->prom_name, sizeof(pbm->prom_name)); + + err = prom_getproperty(prom_node, "ranges", + (char *) pbm->pbm_ranges, + sizeof(pbm->pbm_ranges)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no ranges property.\n", + pbm->name); + prom_halt(); + } + + pbm->num_pbm_ranges = + (err / sizeof(struct linux_prom_pci_ranges)); + + /* Mask out the top 8 bits of the ranges, leaving the real + * physical address. + */ + for (i = 0; i < pbm->num_pbm_ranges; i++) + pbm->pbm_ranges[i].parent_phys_hi &= 0x0fffffff; + + pci_sun4v_determine_mem_io_space(pbm); + pbm_register_toplevel_resources(p, pbm); + + err = prom_getproperty(prom_node, "interrupt-map", + (char *)pbm->pbm_intmap, + sizeof(pbm->pbm_intmap)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no interrupt-map property.\n", + pbm->name); + prom_halt(); + } + + pbm->num_pbm_intmap = (err / sizeof(struct linux_prom_pci_intmap)); + err = prom_getproperty(prom_node, "interrupt-map-mask", + (char *)&pbm->pbm_intmask, + sizeof(pbm->pbm_intmask)); + if (err == 0 || err == -1) { + prom_printf("%s: Fatal error, no interrupt-map-mask.\n", + pbm->name); + prom_halt(); + } + + pci_sun4v_get_bus_range(pbm); + pci_sun4v_iommu_init(pbm); +} + +void sun4v_pci_init(int node, char *model_name) +{ + struct pci_controller_info *p; + struct pci_iommu *iommu; + struct linux_prom64_registers regs; + u32 devhandle; + int i; + + prom_getproperty(node, "reg", (char *)®s, sizeof(regs)); + devhandle = (regs.phys_addr >> 32UL) & 0x0fffffff; + + for (p = pci_controller_root; p; p = p->next) { + struct pci_pbm_info *pbm; + + if (p->pbm_A.prom_node && p->pbm_B.prom_node) + continue; + + pbm = (p->pbm_A.prom_node ? + &p->pbm_A : + &p->pbm_B); + + if (pbm->devhandle == (devhandle ^ 0x40)) { + pci_sun4v_pbm_init(p, node, devhandle); + return; + } + } + + for_each_cpu(i) { + unsigned long page = get_zeroed_page(GFP_ATOMIC); + + if (!page) + goto fatal_memory_error; + + per_cpu(pci_iommu_batch, i).pglist = (u64 *) page; + } + + p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC); + if (!p) + goto fatal_memory_error; + + memset(p, 0, sizeof(*p)); + + iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + if (!iommu) + goto fatal_memory_error; + + memset(iommu, 0, sizeof(*iommu)); + p->pbm_A.iommu = iommu; + + iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC); + if (!iommu) + goto fatal_memory_error; + + memset(iommu, 0, sizeof(*iommu)); + p->pbm_B.iommu = iommu; + + p->next = pci_controller_root; + pci_controller_root = p; + + p->index = pci_num_controllers++; + p->pbms_same_domain = 0; + + p->scan_bus = pci_sun4v_scan_bus; + p->irq_build = pci_sun4v_irq_build; + p->base_address_update = pci_sun4v_base_address_update; + p->resource_adjust = pci_sun4v_resource_adjust; + p->pci_ops = &pci_sun4v_ops; + + /* Like PSYCHO and SCHIZO we have a 2GB aligned area + * for memory space. + */ + pci_memspace_mask = 0x7fffffffUL; + + pci_sun4v_pbm_init(p, node, devhandle); + return; + +fatal_memory_error: + prom_printf("SUN4V_PCI: Fatal memory allocation error.\n"); + prom_halt(); +} diff --git a/arch/sparc64/kernel/pci_sun4v.h b/arch/sparc64/kernel/pci_sun4v.h new file mode 100644 index 0000000..884d25f --- /dev/null +++ b/arch/sparc64/kernel/pci_sun4v.h @@ -0,0 +1,31 @@ +/* pci_sun4v.h: SUN4V specific PCI controller support. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#ifndef _PCI_SUN4V_H +#define _PCI_SUN4V_H + +extern long pci_sun4v_iommu_map(unsigned long devhandle, + unsigned long tsbid, + unsigned long num_ttes, + unsigned long io_attributes, + unsigned long io_page_list_pa); +extern unsigned long pci_sun4v_iommu_demap(unsigned long devhandle, + unsigned long tsbid, + unsigned long num_ttes); +extern unsigned long pci_sun4v_iommu_getmap(unsigned long devhandle, + unsigned long tsbid, + unsigned long *io_attributes, + unsigned long *real_address); +extern unsigned long pci_sun4v_config_get(unsigned long devhandle, + unsigned long pci_device, + unsigned long config_offset, + unsigned long size); +extern int pci_sun4v_config_put(unsigned long devhandle, + unsigned long pci_device, + unsigned long config_offset, + unsigned long size, + unsigned long data); + +#endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc64/kernel/pci_sun4v_asm.S b/arch/sparc64/kernel/pci_sun4v_asm.S new file mode 100644 index 0000000..6604fdb --- /dev/null +++ b/arch/sparc64/kernel/pci_sun4v_asm.S @@ -0,0 +1,95 @@ +/* pci_sun4v_asm: Hypervisor calls for PCI support. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#include <asm/hypervisor.h> + + /* %o0: devhandle + * %o1: tsbid + * %o2: num ttes + * %o3: io_attributes + * %o4: io_page_list phys address + * + * returns %o0: -status if status was non-zero, else + * %o0: num pages mapped + */ + .globl pci_sun4v_iommu_map +pci_sun4v_iommu_map: + mov %o5, %g1 + mov HV_FAST_PCI_IOMMU_MAP, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 1f + sub %g0, %o0, %o0 + mov %o1, %o0 +1: retl + nop + + /* %o0: devhandle + * %o1: tsbid + * %o2: num ttes + * + * returns %o0: num ttes demapped + */ + .globl pci_sun4v_iommu_demap +pci_sun4v_iommu_demap: + mov HV_FAST_PCI_IOMMU_DEMAP, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 + + /* %o0: devhandle + * %o1: tsbid + * %o2: &io_attributes + * %o3: &real_address + * + * returns %o0: status + */ + .globl pci_sun4v_iommu_getmap +pci_sun4v_iommu_getmap: + mov %o2, %o4 + mov HV_FAST_PCI_IOMMU_GETMAP, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + stx %o2, [%o3] + retl + mov %o0, %o0 + + /* %o0: devhandle + * %o1: pci_device + * %o2: pci_config_offset + * %o3: size + * + * returns %o0: data + * + * If there is an error, the data will be returned + * as all 1's. + */ + .globl pci_sun4v_config_get +pci_sun4v_config_get: + mov HV_FAST_PCI_CONFIG_GET, %o5 + ta HV_FAST_TRAP + brnz,a,pn %o1, 1f + mov -1, %o2 +1: retl + mov %o2, %o0 + + /* %o0: devhandle + * %o1: pci_device + * %o2: pci_config_offset + * %o3: size + * %o4: data + * + * returns %o0: status + * + * status will be zero if the operation completed + * successfully, else -1 if not + */ + .globl pci_sun4v_config_put +pci_sun4v_config_put: + mov HV_FAST_PCI_CONFIG_PUT, %o5 + ta HV_FAST_TRAP + brnz,a,pn %o1, 1f + mov -1, %o1 +1: retl + mov %o1, %o0 diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 059b0d0..1c7ca2f 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -44,83 +44,61 @@ #include <asm/fpumacro.h> #include <asm/head.h> #include <asm/cpudata.h> +#include <asm/mmu_context.h> #include <asm/unistd.h> +#include <asm/hypervisor.h> /* #define VERBOSE_SHOWREGS */ -/* - * Nothing special yet... - */ -void default_idle(void) -{ -} - -#ifndef CONFIG_SMP - -/* - * the idle loop on a Sparc... ;) - */ -void cpu_idle(void) +static void sparc64_yield(void) { - /* endless idle loop with no priority at all */ - for (;;) { - /* If current->work.need_resched is zero we should really - * setup for a system wakup event and execute a shutdown - * instruction. - * - * But this requires writing back the contents of the - * L2 cache etc. so implement this later. -DaveM - */ - while (!need_resched()) - barrier(); + if (tlb_type != hypervisor) + return; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - check_pgt_cache(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + + while (!need_resched()) { + unsigned long pstate; + + /* Disable interrupts. */ + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "andn %0, %1, %0\n\t" + "wrpr %0, %%g0, %%pstate" + : "=&r" (pstate) + : "i" (PSTATE_IE)); + + if (!need_resched()) + sun4v_cpu_yield(); + + /* Re-enable interrupts. */ + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "or %0, %1, %0\n\t" + "wrpr %0, %%g0, %%pstate" + : "=&r" (pstate) + : "i" (PSTATE_IE)); } -} -#else + set_thread_flag(TIF_POLLING_NRFLAG); +} -/* - * the idle loop on a UltraMultiPenguin... - * - * TIF_POLLING_NRFLAG is set because we do not sleep the cpu - * inside of the idler task, so an interrupt is not needed - * to get a clean fast response. - * - * XXX Reverify this assumption... -DaveM - * - * Addendum: We do want it to do something for the signal - * delivery case, we detect that by just seeing - * if we are trying to send this to an idler or not. - */ +/* The idle loop on sparc64. */ void cpu_idle(void) { - cpuinfo_sparc *cpuinfo = &local_cpu_data(); set_thread_flag(TIF_POLLING_NRFLAG); while(1) { if (need_resched()) { - cpuinfo->idle_volume = 0; preempt_enable_no_resched(); schedule(); preempt_disable(); - check_pgt_cache(); } - cpuinfo->idle_volume++; - - /* The store ordering is so that IRQ handlers on - * other cpus see our increasing idleness for the buddy - * redistribution algorithm. -DaveM - */ - membar_storeload_storestore(); + sparc64_yield(); } } -#endif - extern char reboot_command []; extern void (*prom_palette)(int); @@ -354,6 +332,7 @@ void show_regs(struct pt_regs *regs) extern long etrap, etraptl1; #endif __show_regs(regs); +#if 0 #ifdef CONFIG_SMP { extern void smp_report_regs(void); @@ -361,6 +340,7 @@ void show_regs(struct pt_regs *regs) smp_report_regs(); } #endif +#endif #ifdef VERBOSE_SHOWREGS if (regs->tpc >= &etrap && regs->tpc < &etraptl1 && @@ -433,30 +413,15 @@ void exit_thread(void) void flush_thread(void) { struct thread_info *t = current_thread_info(); + struct mm_struct *mm; if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); - if (t->task->mm) { - unsigned long pgd_cache = 0UL; - if (test_thread_flag(TIF_32BIT)) { - struct mm_struct *mm = t->task->mm; - pgd_t *pgd0 = &mm->pgd[0]; - pud_t *pud0 = pud_offset(pgd0, 0); + mm = t->task->mm; + if (mm) + tsb_context_switch(mm); - if (pud_none(*pud0)) { - pmd_t *page = pmd_alloc_one(mm, 0); - pud_set(pud0, page); - } - pgd_cache = get_pgd_cache(pgd0); - } - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (pgd_cache), - "r" (TSB_REG), - "i" (ASI_DMMU)); - } set_thread_wsaved(0); /* Turn off performance counters if on. */ @@ -555,6 +520,18 @@ void synchronize_user_stack(void) } } +static void stack_unaligned(unsigned long sp) +{ + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *) sp; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + void fault_in_user_windows(void) { struct thread_info *t = current_thread_info(); @@ -570,13 +547,17 @@ void fault_in_user_windows(void) flush_user_windows(); window = get_thread_wsaved(); - if (window != 0) { + if (likely(window != 0)) { window -= 1; do { unsigned long sp = (t->rwbuf_stkptrs[window] + bias); struct reg_window *rwin = &t->reg_window[window]; - if (copy_to_user((char __user *)sp, rwin, winsize)) + if (unlikely(sp & 0x7UL)) + stack_unaligned(sp); + + if (unlikely(copy_to_user((char __user *)sp, + rwin, winsize))) goto barf; } while (window--); } diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 3f9746f..eb93e9c 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -124,6 +124,9 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, { BUG_ON(len > PAGE_SIZE); + if (tlb_type == hypervisor) + return; + #ifdef DCACHE_ALIASING_POSSIBLE /* If bit 13 of the kernel address we used to access the * user page is the same as the virtual address that page diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index b80eba0..7130e86 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -223,12 +223,26 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 - mov TSB_REG, %g6 - brnz,a,pn %l3, 1f - ldxa [%g6] ASI_IMMU, %g5 -1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 + brz,pt %l3, 1f + mov %g6, %l2 + + /* Must do this before thread reg is clobbered below. */ + LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2) +1: + ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 - wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + + /* Normal globals are restored, go to trap globals. */ +661: wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + nop + .section .sun4v_2insn_patch, "ax" + .word 661b + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + SET_GL(1) + .previous + + mov %l2, %g6 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 @@ -252,27 +266,108 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 brnz,pn %l3, kern_rtt mov PRIMARY_CONTEXT, %l7 - ldxa [%l7 + %l7] ASI_DMMU, %l0 + +661: ldxa [%l7 + %l7] ASI_DMMU, %l0 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%l7 + %l7] ASI_MMU, %l0 + .previous + sethi %hi(sparc64_kern_pri_nuc_bits), %l1 ldx [%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1 or %l0, %l1, %l0 - stxa %l0, [%l7] ASI_DMMU - flush %g6 + +661: stxa %l0, [%l7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %l0, [%l7] ASI_MMU + .previous + + sethi %hi(KERNBASE), %l7 + flush %l7 rdpr %wstate, %l1 rdpr %otherwin, %l2 srl %l1, 3, %l1 wrpr %l2, %g0, %canrestore wrpr %l1, %g0, %wstate - wrpr %g0, %g0, %otherwin + brnz,pt %l2, user_rtt_restore + wrpr %g0, %g0, %otherwin + + ldx [%g6 + TI_FLAGS], %g3 + wr %g0, ASI_AIUP, %asi + rdpr %cwp, %g1 + andcc %g3, _TIF_32BIT, %g0 + sub %g1, 1, %g1 + bne,pt %xcc, user_rtt_fill_32bit + wrpr %g1, %cwp + ba,a,pt %xcc, user_rtt_fill_64bit + +user_rtt_fill_fixup: + rdpr %cwp, %g1 + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + + rdpr %wstate, %g2 + sll %g2, 3, %g2 + wrpr %g2, 0x0, %wstate + + /* We know %canrestore and %otherwin are both zero. */ + + sethi %hi(sparc64_kern_pri_context), %g2 + ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + sethi %hi(KERNBASE), %g1 + flush %g1 + + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + + mov %g6, %l1 + wrpr %g0, 0x0, %tl + +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + + wrpr %g0, RTRAP_PSTATE, %pstate + + mov %l1, %g6 + ldx [%g6 + TI_TASK], %g4 + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + +user_rtt_pre_restore: + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + +user_rtt_restore: restore rdpr %canrestore, %g1 wrpr %g1, 0x0, %cleanwin retry nop -kern_rtt: restore +kern_rtt: rdpr %canrestore, %g1 + brz,pn %g1, kern_rtt_fill + nop +kern_rtt_restore: + restore retry + to_kernel: #ifdef CONFIG_PREEMPT ldsw [%g6 + TI_PRE_COUNT], %l5 diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index d95a1bc..1d6ffde 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -693,11 +693,11 @@ void sbus_set_sbus64(struct sbus_dev *sdev, int bursts) /* SBUS SYSIO INO number to Sparc PIL level. */ static unsigned char sysio_ino_to_pil[] = { - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 0 */ - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 1 */ - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 2 */ - 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 3 */ - 4, /* Onboard SCSI */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 0 */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 1 */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 2 */ + 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 3 */ + 5, /* Onboard SCSI */ 5, /* Onboard Ethernet */ /*XXX*/ 8, /* Onboard BPP */ 0, /* Bogon */ diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 158bd31..7d0e67c 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -64,12 +64,6 @@ struct screen_info screen_info = { 16 /* orig-video-points */ }; -/* Typing sync at the prom prompt calls the function pointed to by - * the sync callback which I set to the following function. - * This should sync all filesystems and return, for now it just - * prints out pretty messages and returns. - */ - void (*prom_palette)(int); void (*prom_keyboard)(void); @@ -79,259 +73,6 @@ prom_console_write(struct console *con, const char *s, unsigned n) prom_write(s, n); } -static struct console prom_console = { - .name = "prom", - .write = prom_console_write, - .flags = CON_CONSDEV | CON_ENABLED, - .index = -1, -}; - -#define PROM_TRUE -1 -#define PROM_FALSE 0 - -/* Pretty sick eh? */ -int prom_callback(long *args) -{ - struct console *cons, *saved_console = NULL; - unsigned long flags; - char *cmd; - extern spinlock_t prom_entry_lock; - - if (!args) - return -1; - if (!(cmd = (char *)args[0])) - return -1; - - /* - * The callback can be invoked on the cpu that first dropped - * into prom_cmdline after taking the serial interrupt, or on - * a slave processor that was smp_captured() if the - * administrator has done a switch-cpu inside obp. In either - * case, the cpu is marked as in-interrupt. Drop IRQ locks. - */ - irq_exit(); - - /* XXX Revisit the locking here someday. This is a debugging - * XXX feature so it isnt all that critical. -DaveM - */ - local_irq_save(flags); - - spin_unlock(&prom_entry_lock); - cons = console_drivers; - while (cons) { - unregister_console(cons); - cons->flags &= ~(CON_PRINTBUFFER); - cons->next = saved_console; - saved_console = cons; - cons = console_drivers; - } - register_console(&prom_console); - if (!strcmp(cmd, "sync")) { - prom_printf("PROM `%s' command...\n", cmd); - show_free_areas(); - if (current->pid != 0) { - local_irq_enable(); - sys_sync(); - local_irq_disable(); - } - args[2] = 0; - args[args[1] + 3] = -1; - prom_printf("Returning to PROM\n"); - } else if (!strcmp(cmd, "va>tte-data")) { - unsigned long ctx, va; - unsigned long tte = 0; - long res = PROM_FALSE; - - ctx = args[3]; - va = args[4]; - if (ctx) { - /* - * Find process owning ctx, lookup mapping. - */ - struct task_struct *p; - struct mm_struct *mm = NULL; - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - pte_t pte; - - for_each_process(p) { - mm = p->mm; - if (CTX_NRBITS(mm->context) == ctx) - break; - } - if (!mm || - CTX_NRBITS(mm->context) != ctx) - goto done; - - pgdp = pgd_offset(mm, va); - if (pgd_none(*pgdp)) - goto done; - pudp = pud_offset(pgdp, va); - if (pud_none(*pudp)) - goto done; - pmdp = pmd_offset(pudp, va); - if (pmd_none(*pmdp)) - goto done; - - /* Preemption implicitly disabled by virtue of - * being called from inside OBP. - */ - ptep = pte_offset_map(pmdp, va); - pte = *ptep; - if (pte_present(pte)) { - tte = pte_val(pte); - res = PROM_TRUE; - } - pte_unmap(ptep); - goto done; - } - - if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) { - extern unsigned long sparc64_kern_pri_context; - - /* Spitfire Errata #32 workaround */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (sparc64_kern_pri_context), - "r" (PRIMARY_CONTEXT), - "i" (ASI_DMMU)); - - /* - * Locked down tlb entry. - */ - - if (tlb_type == spitfire) - tte = spitfire_get_dtlb_data(SPITFIRE_HIGHEST_LOCKED_TLBENT); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - tte = cheetah_get_ldtlb_data(CHEETAH_HIGHEST_LOCKED_TLBENT); - - res = PROM_TRUE; - goto done; - } - - if (va < PGDIR_SIZE) { - /* - * vmalloc or prom_inherited mapping. - */ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - pte_t pte; - int error; - - if ((va >= LOW_OBP_ADDRESS) && (va < HI_OBP_ADDRESS)) { - tte = prom_virt_to_phys(va, &error); - if (!error) - res = PROM_TRUE; - goto done; - } - pgdp = pgd_offset_k(va); - if (pgd_none(*pgdp)) - goto done; - pudp = pud_offset(pgdp, va); - if (pud_none(*pudp)) - goto done; - pmdp = pmd_offset(pudp, va); - if (pmd_none(*pmdp)) - goto done; - - /* Preemption implicitly disabled by virtue of - * being called from inside OBP. - */ - ptep = pte_offset_kernel(pmdp, va); - pte = *ptep; - if (pte_present(pte)) { - tte = pte_val(pte); - res = PROM_TRUE; - } - goto done; - } - - if (va < PAGE_OFFSET) { - /* - * No mappings here. - */ - goto done; - } - - if (va & (1UL << 40)) { - /* - * I/O page. - */ - - tte = (__pa(va) & _PAGE_PADDR) | - _PAGE_VALID | _PAGE_SZ4MB | - _PAGE_E | _PAGE_P | _PAGE_W; - res = PROM_TRUE; - goto done; - } - - /* - * Normal page. - */ - tte = (__pa(va) & _PAGE_PADDR) | - _PAGE_VALID | _PAGE_SZ4MB | - _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W; - res = PROM_TRUE; - - done: - if (res == PROM_TRUE) { - args[2] = 3; - args[args[1] + 3] = 0; - args[args[1] + 4] = res; - args[args[1] + 5] = tte; - } else { - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = res; - } - } else if (!strcmp(cmd, ".soft1")) { - unsigned long tte; - - tte = args[3]; - prom_printf("%lx:\"%s%s%s%s%s\" ", - (tte & _PAGE_SOFT) >> 7, - tte & _PAGE_MODIFIED ? "M" : "-", - tte & _PAGE_ACCESSED ? "A" : "-", - tte & _PAGE_READ ? "W" : "-", - tte & _PAGE_WRITE ? "R" : "-", - tte & _PAGE_PRESENT ? "P" : "-"); - - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = PROM_TRUE; - } else if (!strcmp(cmd, ".soft2")) { - unsigned long tte; - - tte = args[3]; - prom_printf("%lx ", (tte & 0x07FC000000000000UL) >> 50); - - args[2] = 2; - args[args[1] + 3] = 0; - args[args[1] + 4] = PROM_TRUE; - } else { - prom_printf("unknown PROM `%s' command...\n", cmd); - } - unregister_console(&prom_console); - while (saved_console) { - cons = saved_console; - saved_console = cons->next; - register_console(cons); - } - spin_lock(&prom_entry_lock); - local_irq_restore(flags); - - /* - * Restore in-interrupt status for a resume from obp. - */ - irq_enter(); - return 0; -} - unsigned int boot_flags = 0; #define BOOTME_DEBUG 0x1 #define BOOTME_SINGLE 0x2 @@ -479,15 +220,99 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void register_prom_callbacks(void) +static void __init per_cpu_patch(void) { - prom_setcallback(prom_callback); - prom_feval(": linux-va>tte-data 2 \" va>tte-data\" $callback drop ; " - "' linux-va>tte-data to va>tte-data"); - prom_feval(": linux-.soft1 1 \" .soft1\" $callback 2drop ; " - "' linux-.soft1 to .soft1"); - prom_feval(": linux-.soft2 1 \" .soft2\" $callback 2drop ; " - "' linux-.soft2 to .soft2"); + struct cpuid_patch_entry *p; + unsigned long ver; + int is_jbus; + + if (tlb_type == spitfire && !this_is_starfire) + return; + + is_jbus = 0; + if (tlb_type != hypervisor) { + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + is_jbus = ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID); + } + + p = &__cpuid_patch; + while (p < &__cpuid_patch_end) { + unsigned long addr = p->addr; + unsigned int *insns; + + switch (tlb_type) { + case spitfire: + insns = &p->starfire[0]; + break; + case cheetah: + case cheetah_plus: + if (is_jbus) + insns = &p->cheetah_jbus[0]; + else + insns = &p->cheetah_safari[0]; + break; + case hypervisor: + insns = &p->sun4v[0]; + break; + default: + prom_printf("Unknown cpu type, halting.\n"); + prom_halt(); + }; + + *(unsigned int *) (addr + 0) = insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + *(unsigned int *) (addr + 8) = insns[2]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 8)); + + *(unsigned int *) (addr + 12) = insns[3]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 12)); + + p++; + } +} + +static void __init sun4v_patch(void) +{ + struct sun4v_1insn_patch_entry *p1; + struct sun4v_2insn_patch_entry *p2; + + if (tlb_type != hypervisor) + return; + + p1 = &__sun4v_1insn_patch; + while (p1 < &__sun4v_1insn_patch_end) { + unsigned long addr = p1->addr; + + *(unsigned int *) (addr + 0) = p1->insn; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + p1++; + } + + p2 = &__sun4v_2insn_patch; + while (p2 < &__sun4v_2insn_patch_end) { + unsigned long addr = p2->addr; + + *(unsigned int *) (addr + 0) = p2->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = p2->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + p2++; + } } void __init setup_arch(char **cmdline_p) @@ -496,7 +321,10 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = prom_getbootargs(); strcpy(saved_command_line, *cmdline_p); - printk("ARCH: SUN4U\n"); + if (tlb_type == hypervisor) + printk("ARCH: SUN4V\n"); + else + printk("ARCH: SUN4U\n"); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; @@ -507,6 +335,13 @@ void __init setup_arch(char **cmdline_p) /* Work out if we are starfire early on */ check_if_starfire(); + /* Now we know enough to patch the get_cpuid sequences + * used by trap code. + */ + per_cpu_patch(); + + sun4v_patch(); + boot_flags_init(*cmdline_p); idprom_init(); @@ -514,7 +349,7 @@ void __init setup_arch(char **cmdline_p) if (!root_flags) root_mountflags &= ~MS_RDONLY; ROOT_DEV = old_decode_dev(root_dev); -#ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); @@ -544,6 +379,9 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); + /* Get boot processor trap_block[] setup. */ + init_cur_cpu_trap(current_thread_info()); + paging_init(); } @@ -565,6 +403,12 @@ static int __init set_preferred_console(void) serial_console = 2; } else if (idev == PROMDEV_IRSC && odev == PROMDEV_ORSC) { serial_console = 3; + } else if (idev == PROMDEV_IVCONS && odev == PROMDEV_OVCONS) { + /* sunhv_console_init() doesn't check the serial_console + * value anyways... + */ + serial_console = 4; + return add_preferred_console("ttyHV", 0, NULL); } else { prom_printf("Inconsistent console: " "input %d, output %d\n", @@ -598,9 +442,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) seq_printf(m, "cpu\t\t: %s\n" "fpu\t\t: %s\n" - "promlib\t\t: Version 3 Revision %d\n" - "prom\t\t: %d.%d.%d\n" - "type\t\t: sun4u\n" + "prom\t\t: %s\n" + "type\t\t: %s\n" "ncpus probed\t: %d\n" "ncpus active\t: %d\n" "D$ parity tl1\t: %u\n" @@ -612,10 +455,10 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) , sparc_cpu_type, sparc_fpu_type, - prom_rev, - prom_prev >> 16, - (prom_prev >> 8) & 0xff, - prom_prev & 0xff, + prom_version, + ((tlb_type == hypervisor) ? + "sun4v" : + "sun4u"), ncpus_probed, num_online_cpus(), dcache_parity_tl1_occurred, @@ -692,15 +535,11 @@ static int __init topology_init(void) while (!cpu_find_by_instance(ncpus_probed, NULL, NULL)) ncpus_probed++; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_possible(i)) { - struct cpu *p = kmalloc(sizeof(*p), GFP_KERNEL); - - if (p) { - memset(p, 0, sizeof(*p)); - register_cpu(p, i, NULL); - err = 0; - } + for_each_cpu(i) { + struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p) { + register_cpu(p, i, NULL); + err = 0; } } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 1f7ad8a..373a701 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -38,6 +38,7 @@ #include <asm/timer.h> #include <asm/starfire.h> #include <asm/tlb.h> +#include <asm/sections.h> extern void calibrate_delay(void); @@ -46,6 +47,8 @@ static unsigned char boot_cpu_id; cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = + { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; static cpumask_t smp_commenced_mask; static cpumask_t cpu_callout_map; @@ -77,7 +80,7 @@ void smp_bogo(struct seq_file *m) void __init smp_store_cpu_info(int id) { - int cpu_node; + int cpu_node, def; /* multiplier and counter set by smp_setup_percpu_timer() */ @@ -87,24 +90,32 @@ void __init smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); - cpu_data(id).pgcache_size = 0; - cpu_data(id).pte_cache[0] = NULL; - cpu_data(id).pte_cache[1] = NULL; - cpu_data(id).pgd_cache = NULL; - cpu_data(id).idle_volume = 1; - + def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024)); cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", - 16 * 1024); + def); + + def = 32; cpu_data(id).dcache_line_size = - prom_getintdefault(cpu_node, "dcache-line-size", 32); + prom_getintdefault(cpu_node, "dcache-line-size", def); + + def = 16 * 1024; cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size", - 16 * 1024); + def); + + def = 32; cpu_data(id).icache_line_size = - prom_getintdefault(cpu_node, "icache-line-size", 32); + prom_getintdefault(cpu_node, "icache-line-size", def); + + def = ((tlb_type == hypervisor) ? + (3 * 1024 * 1024) : + (4 * 1024 * 1024)); cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size", - 4 * 1024 * 1024); + def); + + def = 64; cpu_data(id).ecache_line_size = - prom_getintdefault(cpu_node, "ecache-line-size", 64); + prom_getintdefault(cpu_node, "ecache-line-size", def); + printk("CPU[%d]: Caches " "D[sz(%d):line_sz(%d)] " "I[sz(%d):line_sz(%d)] " @@ -119,27 +130,16 @@ static void smp_setup_percpu_timer(void); static volatile unsigned long callin_flag = 0; -extern void inherit_locked_prom_mappings(int save_p); - -static inline void cpu_setup_percpu_base(unsigned long cpu_id) -{ - __asm__ __volatile__("mov %0, %%g5\n\t" - "stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (__per_cpu_offset(cpu_id)), - "r" (TSB_REG), "i" (ASI_IMMU)); -} - void __init smp_callin(void) { int cpuid = hard_smp_processor_id(); - inherit_locked_prom_mappings(0); + __local_per_cpu_offset = __per_cpu_offset(cpuid); - __flush_tlb_all(); + if (tlb_type == hypervisor) + sun4v_ktsb_register(); - cpu_setup_percpu_base(cpuid); + __flush_tlb_all(); smp_setup_percpu_timer(); @@ -316,6 +316,8 @@ static void smp_synchronize_one_tick(int cpu) spin_unlock_irqrestore(&itc_sync_lock, flags); } +extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); + extern unsigned long sparc64_cpu_startup; /* The OBP cpu startup callback truncates the 3rd arg cookie to @@ -331,21 +333,31 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) unsigned long cookie = (unsigned long)(&cpu_new_thread); struct task_struct *p; - int timeout, ret, cpu_node; + int timeout, ret; p = fork_idle(cpu); callin_flag = 0; cpu_new_thread = task_thread_info(p); cpu_set(cpu, cpu_callout_map); - cpu_find_by_mid(cpu, &cpu_node); - prom_startcpu(cpu_node, entry, cookie); + if (tlb_type == hypervisor) { + /* Alloc the mondo queues, cpu will load them. */ + sun4v_init_mondo_queues(0, cpu, 1, 0); + + prom_startcpu_cpuid(cpu, entry, cookie); + } else { + int cpu_node; + + cpu_find_by_mid(cpu, &cpu_node); + prom_startcpu(cpu_node, entry, cookie); + } for (timeout = 0; timeout < 5000000; timeout++) { if (callin_flag) break; udelay(100); } + if (callin_flag) { ret = 0; } else { @@ -441,7 +453,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { u64 pstate, ver; - int nack_busy_id, is_jalapeno; + int nack_busy_id, is_jbus; if (cpus_empty(mask)) return; @@ -451,7 +463,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas * derivative processor. */ __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - is_jalapeno = ((ver >> 32) == 0x003e0016); + is_jbus = ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); @@ -476,7 +489,7 @@ retry: for_each_cpu_mask(i, mask) { u64 target = (i << 14) | 0x70; - if (!is_jalapeno) + if (!is_jbus) target |= (nack_busy_id << 24); __asm__ __volatile__( "stxa %%g0, [%0] %1\n\t" @@ -529,7 +542,7 @@ retry: for_each_cpu_mask(i, mask) { u64 check_mask; - if (is_jalapeno) + if (is_jbus) check_mask = (0x2UL << (2*i)); else check_mask = (0x2UL << @@ -544,6 +557,155 @@ retry: } } +/* Multi-cpu list version. */ +static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) +{ + struct trap_per_cpu *tb; + u16 *cpu_list; + u64 *mondo; + cpumask_t error_mask; + unsigned long flags, status; + int cnt, retries, this_cpu, prev_sent, i; + + /* We have to do this whole thing with interrupts fully disabled. + * Otherwise if we send an xcall from interrupt context it will + * corrupt both our mondo block and cpu list state. + * + * One consequence of this is that we cannot use timeout mechanisms + * that depend upon interrupts being delivered locally. So, for + * example, we cannot sample jiffies and expect it to advance. + * + * Fortunately, udelay() uses %stick/%tick so we can use that. + */ + local_irq_save(flags); + + this_cpu = smp_processor_id(); + tb = &trap_block[this_cpu]; + + mondo = __va(tb->cpu_mondo_block_pa); + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + cpu_list = __va(tb->cpu_list_pa); + + /* Setup the initial cpu list. */ + cnt = 0; + for_each_cpu_mask(i, mask) + cpu_list[cnt++] = i; + + cpus_clear(error_mask); + retries = 0; + prev_sent = 0; + do { + int forward_progress, n_sent; + + status = sun4v_cpu_mondo_send(cnt, + tb->cpu_list_pa, + tb->cpu_mondo_block_pa); + + /* HV_EOK means all cpus received the xcall, we're done. */ + if (likely(status == HV_EOK)) + break; + + /* First, see if we made any forward progress. + * + * The hypervisor indicates successful sends by setting + * cpu list entries to the value 0xffff. + */ + n_sent = 0; + for (i = 0; i < cnt; i++) { + if (likely(cpu_list[i] == 0xffff)) + n_sent++; + } + + forward_progress = 0; + if (n_sent > prev_sent) + forward_progress = 1; + + prev_sent = n_sent; + + /* If we get a HV_ECPUERROR, then one or more of the cpus + * in the list are in error state. Use the cpu_state() + * hypervisor call to find out which cpus are in error state. + */ + if (unlikely(status == HV_ECPUERROR)) { + for (i = 0; i < cnt; i++) { + long err; + u16 cpu; + + cpu = cpu_list[i]; + if (cpu == 0xffff) + continue; + + err = sun4v_cpu_state(cpu); + if (err >= 0 && + err == HV_CPU_STATE_ERROR) { + cpu_list[i] = 0xffff; + cpu_set(cpu, error_mask); + } + } + } else if (unlikely(status != HV_EWOULDBLOCK)) + goto fatal_mondo_error; + + /* Don't bother rewriting the CPU list, just leave the + * 0xffff and non-0xffff entries in there and the + * hypervisor will do the right thing. + * + * Only advance timeout state if we didn't make any + * forward progress. + */ + if (unlikely(!forward_progress)) { + if (unlikely(++retries > 10000)) + goto fatal_mondo_timeout; + + /* Delay a little bit to let other cpus catch up + * on their cpu mondo queue work. + */ + udelay(2 * cnt); + } + } while (1); + + local_irq_restore(flags); + + if (unlikely(!cpus_empty(error_mask))) + goto fatal_mondo_cpu_error; + + return; + +fatal_mondo_cpu_error: + printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " + "were in error state\n", + this_cpu); + printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu); + for_each_cpu_mask(i, error_mask) + printk("%d ", i); + printk("]\n"); + return; + +fatal_mondo_timeout: + local_irq_restore(flags); + printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " + " progress after %d retries.\n", + this_cpu, retries); + goto dump_cpu_list_and_out; + +fatal_mondo_error: + local_irq_restore(flags); + printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", + this_cpu, status); + printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " + "mondo_block_pa(%lx)\n", + this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + +dump_cpu_list_and_out: + printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); + for (i = 0; i < cnt; i++) + printk("%u ", cpu_list[i]); + printk("]\n"); +} + /* Send cross call to all processors mentioned in MASK * except self. */ @@ -557,8 +719,10 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d if (tlb_type == spitfire) spitfire_xcall_deliver(data0, data1, data2, mask); - else + else if (tlb_type == cheetah || tlb_type == cheetah_plus) cheetah_xcall_deliver(data0, data1, data2, mask); + else + hypervisor_xcall_deliver(data0, data1, data2, mask); /* NOTE: Caller runs local copy on master. */ put_cpu(); @@ -594,16 +758,13 @@ extern unsigned long xcall_call_function; * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(void (*func)(void *info), void *info, - int nonatomic, int wait) +static int smp_call_function_mask(void (*func)(void *info), void *info, + int nonatomic, int wait, cpumask_t mask) { struct call_data_struct data; - int cpus = num_online_cpus() - 1; + int cpus; long timeout; - if (!cpus) - return 0; - /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -614,9 +775,14 @@ int smp_call_function(void (*func)(void *info), void *info, spin_lock(&call_lock); + cpu_clear(smp_processor_id(), mask); + cpus = cpus_weight(mask); + if (!cpus) + goto out_unlock; + call_data = &data; - smp_cross_call(&xcall_call_function, 0, 0, 0); + smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask); /* * Wait for other cpus to complete function or at @@ -630,18 +796,25 @@ int smp_call_function(void (*func)(void *info), void *info, udelay(1); } +out_unlock: spin_unlock(&call_lock); return 0; out_timeout: spin_unlock(&call_lock); - printk("XCALL: Remote cpus not responding, ncpus=%ld finished=%ld\n", - (long) num_online_cpus() - 1L, - (long) atomic_read(&data.finished)); + printk("XCALL: Remote cpus not responding, ncpus=%d finished=%d\n", + cpus, atomic_read(&data.finished)); return 0; } +int smp_call_function(void (*func)(void *info), void *info, + int nonatomic, int wait) +{ + return smp_call_function_mask(func, info, nonatomic, wait, + cpu_online_map); +} + void smp_call_function_client(int irq, struct pt_regs *regs) { void (*func) (void *info) = call_data->func; @@ -659,13 +832,25 @@ void smp_call_function_client(int irq, struct pt_regs *regs) } } +static void tsb_sync(void *info) +{ + struct mm_struct *mm = info; + + if (current->active_mm == mm) + tsb_context_switch(mm); +} + +void smp_tsb_sync(struct mm_struct *mm) +{ + smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask); +} + extern unsigned long xcall_flush_tlb_mm; extern unsigned long xcall_flush_tlb_pending; extern unsigned long xcall_flush_tlb_kernel_range; -extern unsigned long xcall_flush_tlb_all_spitfire; -extern unsigned long xcall_flush_tlb_all_cheetah; extern unsigned long xcall_report_regs; extern unsigned long xcall_receive_signal; +extern unsigned long xcall_new_mmu_context_version; #ifdef DCACHE_ALIASING_POSSIBLE extern unsigned long xcall_flush_dcache_page_cheetah; @@ -693,11 +878,17 @@ static __inline__ void __local_flush_dcache_page(struct page *page) void smp_flush_dcache_page_impl(struct page *page, int cpu) { cpumask_t mask = cpumask_of_cpu(cpu); - int this_cpu = get_cpu(); + int this_cpu; + + if (tlb_type == hypervisor) + return; #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes); #endif + + this_cpu = get_cpu(); + if (cpu == this_cpu) { __local_flush_dcache_page(page); } else if (cpu_online(cpu)) { @@ -713,7 +904,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) __pa(pg_addr), (u64) pg_addr, mask); - } else { + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE data0 = ((u64)&xcall_flush_dcache_page_cheetah); @@ -735,7 +926,12 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) void *pg_addr = page_address(page); cpumask_t mask = cpu_online_map; u64 data0; - int this_cpu = get_cpu(); + int this_cpu; + + if (tlb_type == hypervisor) + return; + + this_cpu = get_cpu(); cpu_clear(this_cpu, mask); @@ -752,7 +948,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) __pa(pg_addr), (u64) pg_addr, mask); - } else { + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE data0 = ((u64)&xcall_flush_dcache_page_cheetah); cheetah_xcall_deliver(data0, @@ -769,38 +965,58 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) put_cpu(); } +static void __smp_receive_signal_mask(cpumask_t mask) +{ + smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask); +} + void smp_receive_signal(int cpu) { cpumask_t mask = cpumask_of_cpu(cpu); - if (cpu_online(cpu)) { - u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff); - - if (tlb_type == spitfire) - spitfire_xcall_deliver(data0, 0, 0, mask); - else - cheetah_xcall_deliver(data0, 0, 0, mask); - } + if (cpu_online(cpu)) + __smp_receive_signal_mask(mask); } void smp_receive_signal_client(int irq, struct pt_regs *regs) { - /* Just return, rtrap takes care of the rest. */ clear_softint(1 << irq); } -void smp_report_regs(void) +void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) { - smp_cross_call(&xcall_report_regs, 0, 0, 0); + struct mm_struct *mm; + unsigned long flags; + + clear_softint(1 << irq); + + /* See if we need to allocate a new TLB context because + * the version of the one we are using is now out of date. + */ + mm = current->active_mm; + if (unlikely(!mm || (mm == &init_mm))) + return; + + spin_lock_irqsave(&mm->context.lock, flags); + + if (unlikely(!CTX_VALID(mm->context))) + get_new_mmu_context(mm); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + load_secondary_context(mm); + __flush_tlb_mm(CTX_HWBITS(mm->context), + SECONDARY_CONTEXT); } -void smp_flush_tlb_all(void) +void smp_new_mmu_context_version(void) { - if (tlb_type == spitfire) - smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0); - else - smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0); - __flush_tlb_all(); + smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); +} + +void smp_report_regs(void) +{ + smp_cross_call(&xcall_report_regs, 0, 0, 0); } /* We know that the window frames of the user have been flushed @@ -944,24 +1160,19 @@ void smp_release(void) * can service tlb flush xcalls... */ extern void prom_world(int); -extern void save_alternate_globals(unsigned long *); -extern void restore_alternate_globals(unsigned long *); + void smp_penguin_jailcell(int irq, struct pt_regs *regs) { - unsigned long global_save[24]; - clear_softint(1 << irq); preempt_disable(); __asm__ __volatile__("flushw"); - save_alternate_globals(global_save); prom_world(1); atomic_inc(&smp_capture_registry); membar_storeload_storestore(); while (penguins_are_doing_time) rmb(); - restore_alternate_globals(global_save); atomic_dec(&smp_capture_registry); prom_world(0); @@ -1082,6 +1293,8 @@ int setup_profiling_timer(unsigned int multiplier) /* Constrain the number of cpus to max_cpus. */ void __init smp_prepare_cpus(unsigned int max_cpus) { + int i; + if (num_possible_cpus() > max_cpus) { int instance, mid; @@ -1096,6 +1309,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } + for_each_cpu(i) { + if (tlb_type == hypervisor) { + int j; + + /* XXX get this mapping from machine description */ + for_each_cpu(j) { + if ((j >> 2) == (i >> 2)) + cpu_set(j, cpu_sibling_map[i]); + } + } else { + cpu_set(i, cpu_sibling_map[i]); + } + } + smp_store_cpu_info(boot_cpu_id); } @@ -1117,12 +1344,15 @@ void __init smp_setup_cpu_possible_map(void) void __devinit smp_prepare_boot_cpu(void) { - if (hard_smp_processor_id() >= NR_CPUS) { + int cpu = hard_smp_processor_id(); + + if (cpu >= NR_CPUS) { prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); prom_halt(); } - current_thread_info()->cpu = hard_smp_processor_id(); + current_thread_info()->cpu = cpu; + __local_per_cpu_offset = __per_cpu_offset(cpu); cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map); @@ -1139,7 +1369,11 @@ int __devinit __cpu_up(unsigned int cpu) if (!cpu_isset(cpu, cpu_online_map)) { ret = -ENODEV; } else { - smp_synchronize_one_tick(cpu); + /* On SUN4V, writes to %tick and %stick are + * not allowed. + */ + if (tlb_type != hypervisor) + smp_synchronize_one_tick(cpu); } } return ret; @@ -1183,12 +1417,9 @@ void __init setup_per_cpu_areas(void) { unsigned long goal, size, i; char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; /* Copy section for each CPU (we discard the original) */ - goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); - + goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); #ifdef CONFIG_MODULES if (goal < PERCPU_ENOUGH_ROOM) goal = PERCPU_ENOUGH_ROOM; @@ -1197,31 +1428,10 @@ void __init setup_per_cpu_areas(void) for (size = 1UL; size < goal; size <<= 1UL) __per_cpu_shift++; - /* Make sure the resulting __per_cpu_base value - * will fit in the 43-bit sign extended IMMU - * TSB register. - */ - ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE, - (unsigned long) __per_cpu_start); + ptr = alloc_bootmem(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; - if ((__per_cpu_shift < PAGE_SHIFT) || - (__per_cpu_base & ~PAGE_MASK) || - (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) { - prom_printf("PER_CPU: Invalid layout, " - "ptr[%p] shift[%lx] base[%lx]\n", - ptr, __per_cpu_shift, __per_cpu_base); - prom_halt(); - } - for (i = 0; i < NR_CPUS; i++, ptr += size) memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - - /* Finally, load in the boot cpu's base value. - * We abuse the IMMU TSB register for trap handler - * entry and exit loading of %g5. That is why it - * has to be page aligned. - */ - cpu_setup_percpu_base(hard_smp_processor_id()); } diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 3c06bfb..9914a17 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -95,9 +95,6 @@ extern int __ashrdi3(int, int); extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); -extern unsigned long phys_base; -extern unsigned long pfn_base; - extern unsigned int sys_call_table[]; extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); @@ -108,6 +105,14 @@ extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *, extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); +extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + /* Per-CPU information table */ EXPORT_PER_CPU_SYMBOL(__cpu_data); @@ -241,10 +246,6 @@ EXPORT_SYMBOL(verify_compat_iovec); #endif EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(pte_alloc_one_kernel); -#ifndef CONFIG_SMP -EXPORT_SYMBOL(pgt_quicklists); -#endif EXPORT_SYMBOL(put_fs_struct); /* math-emu wants this */ @@ -339,14 +340,10 @@ EXPORT_SYMBOL(copy_to_user_fixup); EXPORT_SYMBOL(copy_from_user_fixup); EXPORT_SYMBOL(copy_in_user_fixup); EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(__bzero_noasi); +EXPORT_SYMBOL(__clear_user); /* Various address conversion macros use this. */ -EXPORT_SYMBOL(phys_base); -EXPORT_SYMBOL(pfn_base); EXPORT_SYMBOL(sparc64_valid_addr_bitmap); -EXPORT_SYMBOL(page_to_pfn); -EXPORT_SYMBOL(pfn_to_page); /* No version information on this, heavily used in inline asm, * and will always be 'void __ret_efault(void)'. @@ -392,4 +389,9 @@ EXPORT_SYMBOL(xor_vis_3); EXPORT_SYMBOL(xor_vis_4); EXPORT_SYMBOL(xor_vis_5); +EXPORT_SYMBOL(xor_niagara_2); +EXPORT_SYMBOL(xor_niagara_3); +EXPORT_SYMBOL(xor_niagara_4); +EXPORT_SYMBOL(xor_niagara_5); + EXPORT_SYMBOL(prom_palette); diff --git a/arch/sparc64/kernel/sun4v_ivec.S b/arch/sparc64/kernel/sun4v_ivec.S new file mode 100644 index 0000000..b49a68b --- /dev/null +++ b/arch/sparc64/kernel/sun4v_ivec.S @@ -0,0 +1,334 @@ +/* sun4v_ivec.S: Sun4v interrupt vector handling. + * + * Copyright (C) 2006 <davem@davemloft.net> + */ + +#include <asm/cpudata.h> +#include <asm/intr_queue.h> + + .text + .align 32 + +sun4v_cpu_mondo: + /* Head offset in %g2, tail offset in %g4. + * If they are the same, no work. + */ + mov INTRQ_CPU_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_CPU_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_cpu_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get CPU mondo queue base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 + + /* Now get the cross-call arguments and handler PC, same + * layout as sun4u: + * + * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it + * high half is context arg to MMU flushes, into %g5 + * 2nd 64-bit word: 64-bit arg, load into %g1 + * 3rd 64-bit word: 64-bit arg, load into %g7 + */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x8, %g2 + srlx %g3, 32, %g5 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + add %g2, 0x8, %g2 + srl %g3, 0, %g3 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g7 + add %g2, 0x40 - 0x8 - 0x8, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_CPU_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + jmpl %g3, %g0 + nop + +sun4v_cpu_mondo_queue_empty: + retry + +sun4v_dev_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_DEVICE_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_DEVICE_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_dev_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get DEV mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5 + + /* Load IVEC into %g3. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x40, %g2 + + /* XXX There can be a full 64-byte block of data here. + * XXX This is how we can get at MSI vector data. + * XXX Current we do not capture this, but when we do we'll + * XXX need to add a 64-byte storage area in the struct ino_bucket + * XXX or the struct irq_desc. + */ + + /* Update queue head pointer, this frees up some registers. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_DEVICE_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Get &__irq_work[smp_processor_id()] into %g1. */ + TRAP_LOAD_IRQ_WORK(%g1, %g4) + + /* Get &ivector_table[IVEC] into %g4. */ + sethi %hi(ivector_table), %g4 + sllx %g3, 5, %g3 + or %g4, %lo(ivector_table), %g4 + add %g4, %g3, %g4 + + /* Load IRQ %pil into %g5. */ + ldub [%g4 + 0x04], %g5 + + /* Insert ivector_table[] entry into __irq_work[] queue. */ + sllx %g5, 2, %g3 + lduw [%g1 + %g3], %g2 /* g2 = irq_work(cpu, pil) */ + stw %g2, [%g4 + 0x00] /* bucket->irq_chain = g2 */ + stw %g4, [%g1 + %g3] /* irq_work(cpu, pil) = bucket */ + + /* Signal the interrupt by setting (1 << pil) in %softint. */ + mov 1, %g2 + sllx %g2, %g5, %g2 + wr %g2, 0x0, %set_softint + +sun4v_dev_mondo_queue_empty: + retry + +sun4v_res_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_RESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_res_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_res_mondo_queue_full + nop + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_RESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, 15, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_resum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_res_mondo_queue_empty: + retry + +sun4v_res_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, 15, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + call sun4v_resum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_NONRESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_nonres_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_nonres_mondo_queue_full + nop + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_NONRESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, 15, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_nonresum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo_queue_empty: + retry + +sun4v_nonres_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, 15, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + call sun4v_nonresum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S new file mode 100644 index 0000000..ab23ddb --- /dev/null +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -0,0 +1,421 @@ +/* sun4v_tlb_miss.S: Sun4v TLB miss handlers. + * + * Copyright (C) 2006 <davem@davemloft.net> + */ + + .text + .align 32 + + /* Load ITLB fault information into VADDR and CTX, using BASE. */ +#define LOAD_ITLB_INFO(BASE, VADDR, CTX) \ + ldx [BASE + HV_FAULT_I_ADDR_OFFSET], VADDR; \ + ldx [BASE + HV_FAULT_I_CTX_OFFSET], CTX; + + /* Load DTLB fault information into VADDR and CTX, using BASE. */ +#define LOAD_DTLB_INFO(BASE, VADDR, CTX) \ + ldx [BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \ + ldx [BASE + HV_FAULT_D_CTX_OFFSET], CTX; + + /* DEST = (VADDR >> 22) + * + * Branch to ZERO_CTX_LABEL if context is zero. + */ +#define COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \ + srlx VADDR, 22, DEST; \ + brz,pn CTX, ZERO_CTX_LABEL; \ + nop; + + /* Create TSB pointer. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ +#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, TMP1, TMP2) \ + and TSB_PTR, 0x7, TMP1; \ + mov 512, TMP2; \ + andn TSB_PTR, 0x7, TSB_PTR; \ + sllx TMP2, TMP1, TMP2; \ + srlx VADDR, PAGE_SHIFT, TMP1; \ + sub TMP2, 1, TMP2; \ + and TMP1, TMP2, TMP1; \ + sllx TMP1, 4, TMP1; \ + add TSB_PTR, TMP1, TSB_PTR; + +sun4v_itlb_miss: + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + + /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + + LOAD_ITLB_INFO(%g2, %g4, %g5) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v) + COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_ITLB, %g3 + andcc %g3, _PAGE_EXEC_4V, %g0 + be,a,pn %xcc, tsb_do_fault + mov FAULT_CODE_ITLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * I-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + */ +sun4v_itlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_IMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + brnz,pn %o0, sun4v_itlb_error + mov %g2, %o1 ! restore %o1 + mov %g1, %o0 ! restore %o0 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_miss: + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + + /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + + LOAD_DTLB_INFO(%g2, %g4, %g5) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v) + COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7) + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_DTLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * D-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + */ +sun4v_dtlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_DMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + brnz,pn %o0, sun4v_dtlb_error + mov %g2, %o1 ! restore %o1 + mov %g1, %o0 ! restore %o0 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_prot: + SET_GL(1) + + /* Load MMU Miss base into %g5. */ + ldxa [%g0] ASI_SCRATCHPAD, %g5 + + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 + rdpr %tl, %g1 + cmp %g1, 1 + bgu,pn %xcc, winfix_trampoline + nop + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + + /* Called from trap table: + * %g4: vaddr + * %g5: context + * %g6: TAG TARGET + */ +sun4v_itsb_miss: + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + brz,pn %g5, kvmap_itlb_4v + mov FAULT_CODE_ITLB, %g3 + ba,a,pt %xcc, sun4v_tsb_miss_common + + /* Called from trap table: + * %g4: vaddr + * %g5: context + * %g6: TAG TARGET + */ +sun4v_dtsb_miss: + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + brz,pn %g5, kvmap_dtlb_4v + mov FAULT_CODE_DTLB, %g3 + + /* fallthrough */ + + /* Create TSB pointer into %g1. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ +sun4v_tsb_miss_common: + COMPUTE_TSB_PTR(%g1, %g4, %g5, %g7) + + /* Branch directly to page table lookup. We have SCRATCHPAD_MMU_MISS + * still in %g2, so it's quite trivial to get at the PGD PHYS value + * so we can preload it into %g7. + */ + sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath + ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 + +sun4v_itlb_error: + sethi %hi(sun4v_err_itlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] + sethi %hi(sun4v_err_itlb_ctx), %g1 + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 + stx %o1, [%g1 + %lo(sun4v_err_itlb_ctx)] + sethi %hi(sun4v_err_itlb_pte), %g1 + stx %g3, [%g1 + %lo(sun4v_err_itlb_pte)] + sethi %hi(sun4v_err_itlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + + rdpr %tl, %g4 + cmp %g4, 1 + ble,pt %icc, 1f + sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 + or %g7, %lo(2f), %g7 + +1: ba,pt %xcc, etrap +2: or %g7, %lo(2b), %g7 + call sun4v_itlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + +sun4v_dtlb_error: + sethi %hi(sun4v_err_dtlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] + sethi %hi(sun4v_err_dtlb_ctx), %g1 + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 + stx %o1, [%g1 + %lo(sun4v_err_dtlb_ctx)] + sethi %hi(sun4v_err_dtlb_pte), %g1 + stx %g3, [%g1 + %lo(sun4v_err_dtlb_pte)] + sethi %hi(sun4v_err_dtlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + + rdpr %tl, %g4 + cmp %g4, 1 + ble,pt %icc, 1f + sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 + or %g7, %lo(2f), %g7 + +1: ba,pt %xcc, etrap +2: or %g7, %lo(2b), %g7 + call sun4v_dtlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + + /* Instruction Access Exception, tl0. */ +sun4v_iacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_insn_access_exception + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Instruction Access Exception, tl1. */ +sun4v_iacc_tl1: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etraptl1 + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_insn_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Data Access Exception, tl0. */ +sun4v_dacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_data_access_exception + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Data Access Exception, tl1. */ +sun4v_dacc_tl1: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etraptl1 + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_data_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Memory Address Unaligned. */ +sun4v_mna: + /* Window fixup? */ + rdpr %tl, %g2 + cmp %g2, 1 + ble,pt %icc, 1f + nop + + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5 + mov HV_FAULT_TYPE_UNALIGNED, %g3 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g4 + sllx %g3, 16, %g3 + or %g4, %g3, %g4 + ba,pt %xcc, winfix_mna + rdpr %tpc, %g3 + /* not reached */ + +1: ldxa [%g0] ASI_SCRATCHPAD, %g2 + mov HV_FAULT_TYPE_UNALIGNED, %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_do_mna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Privileged Action. */ +sun4v_privact: + ba,pt %xcc, etrap + rd %pc, %g7 + call do_privact + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Unaligned ldd float, tl0. */ +sun4v_lddfmna: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_lddfmna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + + /* Unaligned std float, tl0. */ +sun4v_stdfmna: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_stdfmna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define SUN4V_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl sun4v_patch_tlb_handlers + .type sun4v_patch_tlb_handlers,#function +sun4v_patch_tlb_handlers: + SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl0_iax, sun4v_iacc) + SUN4V_DO_PATCH(tl1_iax, sun4v_iacc_tl1) + SUN4V_DO_PATCH(tl0_dax, sun4v_dacc) + SUN4V_DO_PATCH(tl1_dax, sun4v_dacc_tl1) + SUN4V_DO_PATCH(tl0_mna, sun4v_mna) + SUN4V_DO_PATCH(tl1_mna, sun4v_mna) + SUN4V_DO_PATCH(tl0_lddfmna, sun4v_lddfmna) + SUN4V_DO_PATCH(tl0_stdfmna, sun4v_stdfmna) + SUN4V_DO_PATCH(tl0_privact, sun4v_privact) + retl + nop + .size sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 5f8c822..7a86913 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -25,25 +25,93 @@ #include <linux/syscalls.h> #include <linux/ipc.h> #include <linux/personality.h> +#include <linux/random.h> #include <asm/uaccess.h> #include <asm/ipc.h> #include <asm/utrap.h> #include <asm/perfctr.h> +#include <asm/a.out.h> /* #define DEBUG_UNIMP_SYSCALL */ -/* XXX Make this per-binary type, this way we can detect the type of - * XXX a binary. Every Sparc executable calls this very early on. - */ asmlinkage unsigned long sys_getpagesize(void) { return PAGE_SIZE; } -#define COLOUR_ALIGN(addr,pgoff) \ - ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ - (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) +#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) +#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) + +/* Does addr --> addr+len fall within 4GB of the VA-space hole or + * overflow past the end of the 64-bit address space? + */ +static inline int invalid_64bit_range(unsigned long addr, unsigned long len) +{ + unsigned long va_exclude_start, va_exclude_end; + + va_exclude_start = VA_EXCLUDE_START; + va_exclude_end = VA_EXCLUDE_END; + + if (unlikely(len >= va_exclude_start)) + return 1; + + if (unlikely((addr + len) < addr)) + return 1; + + if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) || + ((addr + len) >= va_exclude_start && + (addr + len) < va_exclude_end))) + return 1; + + return 0; +} + +/* Does start,end straddle the VA-space hole? */ +static inline int straddles_64bit_va_hole(unsigned long start, unsigned long end) +{ + unsigned long va_exclude_start, va_exclude_end; + + va_exclude_start = VA_EXCLUDE_START; + va_exclude_end = VA_EXCLUDE_END; + + if (likely(start < va_exclude_start && end < va_exclude_start)) + return 0; + + if (likely(start >= va_exclude_end && end >= va_exclude_end)) + return 0; + + return 1; +} + +/* These functions differ from the default implementations in + * mm/mmap.c in two ways: + * + * 1) For file backed MAP_SHARED mmap()'s we D-cache color align, + * for fixed such mappings we just validate what the user gave us. + * 2) For 64-bit tasks we avoid mapping anything within 4GB of + * the spitfire/niagara VA-hole. + */ + +static inline unsigned long COLOUR_ALIGN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1); + unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1); + + return base + off; +} + +static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = addr & ~(SHMLBA-1); + unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1); + + if (base + off <= addr) + return base + off; + return base - off; +} unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -64,8 +132,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi } if (test_thread_flag(TIF_32BIT)) - task_size = 0xf0000000UL; - if (len > task_size || len > -PAGE_OFFSET) + task_size = STACK_TOP32; + if (unlikely(len > task_size || len >= VA_EXCLUDE_START)) return -ENOMEM; do_color_align = 0; @@ -84,11 +152,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi return addr; } - if (len <= mm->cached_hole_size) { + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; mm->cached_hole_size = 0; - mm->free_area_cache = TASK_UNMAPPED_BASE; } - start_addr = addr = mm->free_area_cache; task_size -= len; @@ -100,11 +169,12 @@ full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ - if (addr < PAGE_OFFSET && -PAGE_OFFSET - len < addr) { - addr = PAGE_OFFSET; - vma = find_vma(mm, PAGE_OFFSET); + if (addr < VA_EXCLUDE_START && + (addr + len) >= VA_EXCLUDE_START) { + addr = VA_EXCLUDE_END; + vma = find_vma(mm, VA_EXCLUDE_END); } - if (task_size < addr) { + if (unlikely(task_size < addr)) { if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; mm->cached_hole_size = 0; @@ -112,7 +182,7 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (likely(!vma || addr + len <= vma->vm_start)) { /* * Remember the place where we stopped the search: */ @@ -128,6 +198,121 @@ full_search: } } +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long task_size = STACK_TOP32; + unsigned long addr = addr0; + int do_color_align; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + if (flags & MAP_FIXED) { + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; + } + + if (unlikely(len > task_size)) + return -ENOMEM; + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + if (do_color_align) { + unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff); + + addr = base + len; + } + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = mm->mmap_base-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + /* Try to align mapping such that we align it as much as possible. */ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -171,15 +356,57 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, u return addr; } +/* Essentially the same as PowerPC... */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) { + random_factor = get_random_int(); + if (test_thread_flag(TIF_32BIT)) + random_factor &= ((1 * 1024 * 1024) - 1); + else + random_factor = ((random_factor << PAGE_SHIFT) & + 0xffffffffUL); + } + + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (!test_thread_flag(TIF_32BIT) || + (current->personality & ADDR_COMPAT_LAYOUT) || + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || + sysctl_legacy_va_layout) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + /* We know it's 32-bit */ + unsigned long task_size = STACK_TOP32; + unsigned long gap; + + gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + if (gap < 128 * 1024 * 1024) + gap = 128 * 1024 * 1024; + if (gap > (task_size / 6 * 5)) + gap = (task_size / 6 * 5); + + mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} + asmlinkage unsigned long sparc_brk(unsigned long brk) { /* People could try to be nasty and use ta 0x6d in 32bit programs */ - if (test_thread_flag(TIF_32BIT) && - brk >= 0xf0000000UL) + if (test_thread_flag(TIF_32BIT) && brk >= STACK_TOP32) return current->mm->brk; - if ((current->mm->brk & PAGE_OFFSET) != (brk & PAGE_OFFSET)) + if (unlikely(straddles_64bit_va_hole(current->mm->brk, brk))) return current->mm->brk; + return sys_brk(brk); } @@ -340,13 +567,16 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, retval = -EINVAL; if (test_thread_flag(TIF_32BIT)) { - if (len > 0xf0000000UL || - ((flags & MAP_FIXED) && addr > 0xf0000000UL - len)) + if (len >= STACK_TOP32) + goto out_putf; + + if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len) goto out_putf; } else { - if (len > -PAGE_OFFSET || - ((flags & MAP_FIXED) && - addr < PAGE_OFFSET && addr + len > -PAGE_OFFSET)) + if (len >= VA_EXCLUDE_START) + goto out_putf; + + if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len)) goto out_putf; } @@ -365,9 +595,9 @@ asmlinkage long sys64_munmap(unsigned long addr, size_t len) { long ret; - if (len > -PAGE_OFFSET || - (addr < PAGE_OFFSET && addr + len > -PAGE_OFFSET)) + if (invalid_64bit_range(addr, len)) return -EINVAL; + down_write(¤t->mm->mmap_sem); ret = do_munmap(current->mm, addr, len); up_write(¤t->mm->mmap_sem); @@ -384,18 +614,19 @@ asmlinkage unsigned long sys64_mremap(unsigned long addr, { struct vm_area_struct *vma; unsigned long ret = -EINVAL; + if (test_thread_flag(TIF_32BIT)) goto out; - if (old_len > -PAGE_OFFSET || new_len > -PAGE_OFFSET) + if (unlikely(new_len >= VA_EXCLUDE_START)) goto out; - if (addr < PAGE_OFFSET && addr + old_len > -PAGE_OFFSET) + if (unlikely(invalid_64bit_range(addr, old_len))) goto out; + down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { - if (new_addr < PAGE_OFFSET && - new_addr + new_len > -PAGE_OFFSET) + if (invalid_64bit_range(new_addr, new_len)) goto out_sem; - } else if (addr < PAGE_OFFSET && addr + new_len > -PAGE_OFFSET) { + } else if (invalid_64bit_range(addr, new_len)) { unsigned long map_flags = 0; struct file *file = NULL; @@ -554,12 +785,10 @@ asmlinkage long sys_utrap_install(utrap_entry_t type, } if (!current_thread_info()->utraps) { current_thread_info()->utraps = - kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL); + kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL); if (!current_thread_info()->utraps) return -ENOMEM; current_thread_info()->utraps[0] = 1; - memset(current_thread_info()->utraps+1, 0, - UT_TRAP_INSTRUCTION_31*sizeof(long)); } else { if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p && current_thread_info()->utraps[0] > 1) { diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 417727b..0e41df0 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -62,6 +62,7 @@ #include <asm/fpumacro.h> #include <asm/semaphore.h> #include <asm/mmu_context.h> +#include <asm/a.out.h> asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) { @@ -1039,15 +1040,15 @@ asmlinkage unsigned long sys32_mremap(unsigned long addr, unsigned long ret = -EINVAL; unsigned long new_addr = __new_addr; - if (old_len > 0xf0000000UL || new_len > 0xf0000000UL) + if (old_len > STACK_TOP32 || new_len > STACK_TOP32) goto out; - if (addr > 0xf0000000UL - old_len) + if (addr > STACK_TOP32 - old_len) goto out; down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { - if (new_addr > 0xf0000000UL - new_len) + if (new_addr > STACK_TOP32 - new_len) goto out_sem; - } else if (addr > 0xf0000000UL - new_len) { + } else if (addr > STACK_TOP32 - new_len) { unsigned long map_flags = 0; struct file *file = NULL; diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index a22930d..7d61f1b 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -30,6 +30,8 @@ #include <linux/cpufreq.h> #include <linux/percpu.h> #include <linux/profile.h> +#include <linux/miscdevice.h> +#include <linux/rtc.h> #include <asm/oplib.h> #include <asm/mostek.h> @@ -45,6 +47,7 @@ #include <asm/smp.h> #include <asm/sections.h> #include <asm/cpudata.h> +#include <asm/uaccess.h> DEFINE_SPINLOCK(mostek_lock); DEFINE_SPINLOCK(rtc_lock); @@ -193,16 +196,22 @@ struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations; static void stick_init_tick(unsigned long offset) { - tick_disable_protection(); - - /* Let the user get at STICK too. */ - __asm__ __volatile__( - " rd %%asr24, %%g2\n" - " andn %%g2, %0, %%g2\n" - " wr %%g2, 0, %%asr24" - : /* no outputs */ - : "r" (TICK_PRIV_BIT) - : "g1", "g2"); + /* Writes to the %tick and %stick register are not + * allowed on sun4v. The Hypervisor controls that + * bit, per-strand. + */ + if (tlb_type != hypervisor) { + tick_disable_protection(); + + /* Let the user get at STICK too. */ + __asm__ __volatile__( + " rd %%asr24, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wr %%g2, 0, %%asr24" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g1", "g2"); + } __asm__ __volatile__( " rd %%asr24, %%g1\n" @@ -683,6 +692,83 @@ static void __init set_system_time(void) } } +/* davem suggests we keep this within the 4M locked kernel image */ +static u32 starfire_get_time(void) +{ + static char obp_gettod[32]; + static u32 unix_tod; + + sprintf(obp_gettod, "h# %08x unix-gettod", + (unsigned int) (long) &unix_tod); + prom_feval(obp_gettod); + + return unix_tod; +} + +static int starfire_set_time(u32 val) +{ + /* Do nothing, time is set using the service processor + * console on this platform. + */ + return 0; +} + +static u32 hypervisor_get_time(void) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + int retries = 10000; + +retry: + func = HV_FAST_TOD_GET; + arg0 = 0; + arg1 = 0; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); + if (arg0 == HV_EOK) + return arg1; + if (arg0 == HV_EWOULDBLOCK) { + if (--retries > 0) { + udelay(100); + goto retry; + } + printk(KERN_WARNING "SUN4V: tod_get() timed out.\n"); + return 0; + } + printk(KERN_WARNING "SUN4V: tod_get() not supported.\n"); + return 0; +} + +static int hypervisor_set_time(u32 secs) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + int retries = 10000; + +retry: + func = HV_FAST_TOD_SET; + arg0 = secs; + __asm__ __volatile__("ta %4" + : "=&r" (func), "=&r" (arg0) + : "0" (func), "1" (arg0), + "i" (HV_FAST_TRAP)); + if (arg0 == HV_EOK) + return 0; + if (arg0 == HV_EWOULDBLOCK) { + if (--retries > 0) { + udelay(100); + goto retry; + } + printk(KERN_WARNING "SUN4V: tod_set() timed out.\n"); + return -EAGAIN; + } + printk(KERN_WARNING "SUN4V: tod_set() not supported.\n"); + return -EOPNOTSUPP; +} + void __init clock_probe(void) { struct linux_prom_registers clk_reg[2]; @@ -702,14 +788,14 @@ void __init clock_probe(void) if (this_is_starfire) { - /* davem suggests we keep this within the 4M locked kernel image */ - static char obp_gettod[256]; - static u32 unix_tod; - - sprintf(obp_gettod, "h# %08x unix-gettod", - (unsigned int) (long) &unix_tod); - prom_feval(obp_gettod); - xtime.tv_sec = unix_tod; + xtime.tv_sec = starfire_get_time(); + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + return; + } + if (tlb_type == hypervisor) { + xtime.tv_sec = hypervisor_get_time(); xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); @@ -981,11 +1067,10 @@ static void sparc64_start_timers(irqreturn_t (*cfunc)(int, void *, struct pt_reg } struct freq_table { - unsigned long udelay_val_ref; unsigned long clock_tick_ref; unsigned int ref_freq; }; -static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0, 0 }; +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 }; unsigned long sparc64_get_clock_tick(unsigned int cpu) { @@ -1007,16 +1092,11 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val if (!ft->ref_freq) { ft->ref_freq = freq->old; - ft->udelay_val_ref = cpu_data(cpu).udelay_val; ft->clock_tick_ref = cpu_data(cpu).clock_tick; } if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || (val == CPUFREQ_RESUMECHANGE)) { - cpu_data(cpu).udelay_val = - cpufreq_scale(ft->udelay_val_ref, - ft->ref_freq, - freq->new); cpu_data(cpu).clock_tick = cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, @@ -1179,3 +1259,246 @@ static int set_rtc_mmss(unsigned long nowtime) return retval; } } + +#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */ +static unsigned char mini_rtc_status; /* bitmapped status byte. */ + +/* months start at 0 now */ +static unsigned char days_in_mo[] = +{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +#define FEBRUARY 2 +#define STARTOFTIME 1970 +#define SECDAY 86400L +#define SECYR (SECDAY * 365) +#define leapyear(year) ((year) % 4 == 0 && \ + ((year) % 100 != 0 || (year) % 400 == 0)) +#define days_in_year(a) (leapyear(a) ? 366 : 365) +#define days_in_month(a) (month_days[(a) - 1]) + +static int month_days[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) + */ +static void GregorianDay(struct rtc_time * tm) +{ + int leapsToDate; + int lastYear; + int day; + int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + + lastYear = tm->tm_year - 1; + + /* + * Number of leap corrections to apply up to end of last year + */ + leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; + + /* + * This year is a leap year if it is divisible by 4 except when it is + * divisible by 100 unless it is divisible by 400 + * + * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was + */ + day = tm->tm_mon > 2 && leapyear(tm->tm_year); + + day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + + tm->tm_mday; + + tm->tm_wday = day % 7; +} + +static void to_tm(int tim, struct rtc_time *tm) +{ + register int i; + register long hms, day; + + day = tim / SECDAY; + hms = tim % SECDAY; + + /* Hours, minutes, seconds are easy */ + tm->tm_hour = hms / 3600; + tm->tm_min = (hms % 3600) / 60; + tm->tm_sec = (hms % 3600) % 60; + + /* Number of years in days */ + for (i = STARTOFTIME; day >= days_in_year(i); i++) + day -= days_in_year(i); + tm->tm_year = i; + + /* Number of months in days left */ + if (leapyear(tm->tm_year)) + days_in_month(FEBRUARY) = 29; + for (i = 1; day >= days_in_month(i); i++) + day -= days_in_month(i); + days_in_month(FEBRUARY) = 28; + tm->tm_mon = i; + + /* Days are what is left over (+1) from all that. */ + tm->tm_mday = day + 1; + + /* + * Determine the day of week + */ + GregorianDay(tm); +} + +/* Both Starfire and SUN4V give us seconds since Jan 1st, 1970, + * aka Unix time. So we have to convert to/from rtc_time. + */ +static inline void mini_get_rtc_time(struct rtc_time *time) +{ + unsigned long flags; + u32 seconds; + + spin_lock_irqsave(&rtc_lock, flags); + seconds = 0; + if (this_is_starfire) + seconds = starfire_get_time(); + else if (tlb_type == hypervisor) + seconds = hypervisor_get_time(); + spin_unlock_irqrestore(&rtc_lock, flags); + + to_tm(seconds, time); + time->tm_year -= 1900; + time->tm_mon -= 1; +} + +static inline int mini_set_rtc_time(struct rtc_time *time) +{ + u32 seconds = mktime(time->tm_year + 1900, time->tm_mon + 1, + time->tm_mday, time->tm_hour, + time->tm_min, time->tm_sec); + unsigned long flags; + int err; + + spin_lock_irqsave(&rtc_lock, flags); + err = -ENODEV; + if (this_is_starfire) + err = starfire_set_time(seconds); + else if (tlb_type == hypervisor) + err = hypervisor_set_time(seconds); + spin_unlock_irqrestore(&rtc_lock, flags); + + return err; +} + +static int mini_rtc_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct rtc_time wtime; + void __user *argp = (void __user *)arg; + + switch (cmd) { + + case RTC_PLL_GET: + return -EINVAL; + + case RTC_PLL_SET: + return -EINVAL; + + case RTC_UIE_OFF: /* disable ints from RTC updates. */ + return 0; + + case RTC_UIE_ON: /* enable ints for RTC updates. */ + return -EINVAL; + + case RTC_RD_TIME: /* Read the time/date from RTC */ + /* this doesn't get week-day, who cares */ + memset(&wtime, 0, sizeof(wtime)); + mini_get_rtc_time(&wtime); + + return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0; + + case RTC_SET_TIME: /* Set the RTC */ + { + int year; + unsigned char leap_yr; + + if (!capable(CAP_SYS_TIME)) + return -EACCES; + + if (copy_from_user(&wtime, argp, sizeof(wtime))) + return -EFAULT; + + year = wtime.tm_year + 1900; + leap_yr = ((!(year % 4) && (year % 100)) || + !(year % 400)); + + if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1)) + return -EINVAL; + + if (wtime.tm_mday < 0 || wtime.tm_mday > + (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr))) + return -EINVAL; + + if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 || + wtime.tm_min < 0 || wtime.tm_min >= 60 || + wtime.tm_sec < 0 || wtime.tm_sec >= 60) + return -EINVAL; + + return mini_set_rtc_time(&wtime); + } + } + + return -EINVAL; +} + +static int mini_rtc_open(struct inode *inode, struct file *file) +{ + if (mini_rtc_status & RTC_IS_OPEN) + return -EBUSY; + + mini_rtc_status |= RTC_IS_OPEN; + + return 0; +} + +static int mini_rtc_release(struct inode *inode, struct file *file) +{ + mini_rtc_status &= ~RTC_IS_OPEN; + return 0; +} + + +static struct file_operations mini_rtc_fops = { + .owner = THIS_MODULE, + .ioctl = mini_rtc_ioctl, + .open = mini_rtc_open, + .release = mini_rtc_release, +}; + +static struct miscdevice rtc_mini_dev = +{ + .minor = RTC_MINOR, + .name = "rtc", + .fops = &mini_rtc_fops, +}; + +static int __init rtc_mini_init(void) +{ + int retval; + + if (tlb_type != hypervisor && !this_is_starfire) + return -ENODEV; + + printk(KERN_INFO "Mini RTC Driver\n"); + + retval = misc_register(&rtc_mini_dev); + if (retval < 0) + return retval; + + return 0; +} + +static void __exit rtc_mini_exit(void) +{ + misc_deregister(&rtc_mini_dev); +} + + +module_init(rtc_mini_init); +module_exit(rtc_mini_exit); diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 9478551..a4dc01a 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -16,6 +16,8 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/mmu.h> +#include <asm/hypervisor.h> +#include <asm/cpudata.h> .data .align 8 @@ -28,14 +30,19 @@ itlb_load: dtlb_load: .asciz "SUNW,dtlb-load" + /* XXX __cpuinit this thing XXX */ +#define TRAMP_STACK_SIZE 1024 + .align 16 +tramp_stack: + .skip TRAMP_STACK_SIZE + .text .align 8 .globl sparc64_cpu_startup, sparc64_cpu_startup_end sparc64_cpu_startup: - flushw - - BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_startup) - BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_startup) + BRANCH_IF_SUN4V(g1, niagara_startup) + BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) ba,pt %xcc, spitfire_startup nop @@ -55,6 +62,7 @@ cheetah_startup: or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 stxa %g5, [%g0] ASI_DCU_CONTROL_REG membar #Sync + /* fallthru */ cheetah_generic_startup: mov TSB_EXTENSION_P, %g3 @@ -70,7 +78,9 @@ cheetah_generic_startup: stxa %g0, [%g3] ASI_DMMU stxa %g0, [%g3] ASI_IMMU membar #Sync + /* fallthru */ +niagara_startup: /* Disable STICK_INT interrupts. */ sethi %hi(0x80000000), %g5 sllx %g5, 32, %g5 @@ -85,17 +95,17 @@ spitfire_startup: membar #Sync startup_continue: - wrpr %g0, 15, %pil - sethi %hi(0x80000000), %g2 sllx %g2, 32, %g2 wr %g2, 0, %tick_cmpr + mov %o0, %l0 + + BRANCH_IF_SUN4V(g1, niagara_lock_tlb) + /* Call OBP by hand to lock KERNBASE into i/d tlbs. * We lock 2 consequetive entries if we are 'bigkernel'. */ - mov %o0, %l0 - sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 membar #StoreLoad | #StoreStore @@ -105,7 +115,6 @@ startup_continue: sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 ldx [%g2 + 0x10], %l2 - mov %sp, %l1 add %l2, -(192 + 128), %sp flushw @@ -142,8 +151,7 @@ startup_continue: sethi %hi(bigkernel), %g2 lduw [%g2 + %lo(bigkernel)], %g2 - cmp %g2, 0 - be,pt %icc, do_dtlb + brz,pt %g2, do_dtlb nop sethi %hi(call_method), %g2 @@ -214,8 +222,7 @@ do_dtlb: sethi %hi(bigkernel), %g2 lduw [%g2 + %lo(bigkernel)], %g2 - cmp %g2, 0 - be,pt %icc, do_unlock + brz,pt %g2, do_unlock nop sethi %hi(call_method), %g2 @@ -257,99 +264,180 @@ do_unlock: stb %g0, [%g2 + %lo(prom_entry_lock)] membar #StoreStore | #StoreLoad - mov %l1, %sp - flushw + ba,pt %xcc, after_lock_tlb + nop + +niagara_lock_tlb: + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + mov HV_MMU_IMMU, %o3 + ta HV_FAST_TRAP + + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + mov HV_MMU_DMMU, %o3 + ta HV_FAST_TRAP - mov %l0, %o0 + sethi %hi(bigkernel), %g2 + lduw [%g2 + %lo(bigkernel)], %g2 + brz,pt %g2, after_lock_tlb + nop + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE + 0x400000), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + sethi %hi(0x400000), %o3 + add %o2, %o3, %o2 + mov HV_MMU_IMMU, %o3 + ta HV_FAST_TRAP + + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sethi %hi(KERNBASE + 0x400000), %o0 + clr %o1 + sethi %hi(kern_locked_tte_data), %o2 + ldx [%o2 + %lo(kern_locked_tte_data)], %o2 + sethi %hi(0x400000), %o3 + add %o2, %o3, %o2 + mov HV_MMU_DMMU, %o3 + ta HV_FAST_TRAP + +after_lock_tlb: wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate wr %g0, 0, %fprs - /* XXX Buggy PROM... */ - srl %o0, 0, %o0 - ldx [%o0], %g6 - wr %g0, ASI_P, %asi mov PRIMARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU + +661: stxa %g0, [%g7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g0, [%g7] ASI_MMU + .previous + membar #Sync mov SECONDARY_CONTEXT, %g7 - stxa %g0, [%g7] ASI_DMMU + +661: stxa %g0, [%g7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g0, [%g7] ASI_MMU + .previous + membar #Sync - mov 1, %g5 - sllx %g5, THREAD_SHIFT, %g5 - sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 - add %g6, %g5, %sp + /* Everything we do here, until we properly take over the + * trap table, must be done with extreme care. We cannot + * make any references to %g6 (current thread pointer), + * %g4 (current task pointer), or %g5 (base of current cpu's + * per-cpu area) until we properly take over the trap table + * from the firmware and hypervisor. + * + * Get onto temporary stack which is in the locked kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS, %sp mov 0, %fp - wrpr %g0, 0, %wstate - wrpr %g0, 0, %tl + /* Put garbage in these registers to trap any access to them. */ + set 0xdeadbeef, %g4 + set 0xdeadbeef, %g5 + set 0xdeadbeef, %g6 - /* Setup the trap globals, then we can resurface. */ - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate - sethi %hi(sparc64_ttable_tl0), %g5 - wrpr %g5, %tba - mov %o2, %g6 - - wrpr %o1, PSTATE_MG, %pstate -#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000) -#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W) - - mov TSB_REG, %g1 - stxa %g0, [%g1] ASI_DMMU - membar #Sync - mov TLB_SFSR, %g1 - sethi %uhi(KERN_HIGHBITS), %g2 - or %g2, %ulo(KERN_HIGHBITS), %g2 - sllx %g2, 32, %g2 - or %g2, KERN_LOWBITS, %g2 + call init_irqwork_curcpu + nop - BRANCH_IF_ANY_CHEETAH(g3,g7,9f) + sethi %hi(tlb_type), %g3 + lduw [%g3 + %lo(tlb_type)], %g2 + cmp %g2, 3 + bne,pt %icc, 1f + nop - ba,pt %xcc, 1f + call hard_smp_processor_id nop + + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 + call sun4v_init_mondo_queues + mov 1, %o3 -9: - sethi %uhi(VPTE_BASE_CHEETAH), %g3 - or %g3, %ulo(VPTE_BASE_CHEETAH), %g3 - ba,pt %xcc, 2f - sllx %g3, 32, %g3 -1: - sethi %uhi(VPTE_BASE_SPITFIRE), %g3 - or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3 - sllx %g3, 32, %g3 +1: call init_cur_cpu_trap + ldx [%l0], %o0 + + /* Start using proper page size encodings in ctx register. */ + sethi %hi(sparc64_kern_pri_context), %g3 + ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 -2: - clr %g7 -#undef KERN_HIGHBITS -#undef KERN_LOWBITS +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous - wrpr %o1, 0x0, %pstate - ldx [%g6 + TI_TASK], %g4 + membar #Sync wrpr %g0, 0, %wstate - call init_irqwork_curcpu + /* As a hack, put &init_thread_union into %g6. + * prom_world() loads from here to restore the %asi + * register. + */ + sethi %hi(init_thread_union), %g6 + or %g6, %lo(init_thread_union), %g6 + + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f nop - /* Start using proper page size encodings in ctx register. */ - sethi %hi(sparc64_kern_pri_context), %g3 - ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 - mov PRIMARY_CONTEXT, %g1 - stxa %g2, [%g1] ASI_DMMU - membar #Sync + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + /* Compute physical address: + * + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) + */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 + + call prom_set_trap_table_sun4v + sethi %hi(sparc64_ttable_tl0), %o0 + + ba,pt %xcc, 2f + nop + +1: call prom_set_trap_table + sethi %hi(sparc64_ttable_tl0), %o0 + +2: ldx [%l0], %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 wrpr %o1, 0, %pstate - call prom_set_trap_table - sethi %hi(sparc64_ttable_tl0), %o0 - call smp_callin nop call cpu_idle diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 8d44ae5..7f7dba0 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/processor.h> #include <asm/timer.h> #include <asm/kdebug.h> +#include <asm/head.h> #ifdef CONFIG_KMOD #include <linux/kmod.h> #endif @@ -72,12 +73,14 @@ struct tl1_traplog { static void dump_tl1_traplog(struct tl1_traplog *p) { - int i; + int i, limit; + + printk(KERN_EMERG "TRAPLOG: Error at trap level 0x%lx, " + "dumping track stack.\n", p->tl); - printk("TRAPLOG: Error at trap level 0x%lx, dumping track stack.\n", - p->tl); - for (i = 0; i < 4; i++) { - printk(KERN_CRIT + limit = (tlb_type == hypervisor) ? 2 : 4; + for (i = 0; i < limit; i++) { + printk(KERN_EMERG "TRAPLOG: Trap level %d TSTATE[%016lx] TPC[%016lx] " "TNPC[%016lx] TT[%lx]\n", i + 1, @@ -179,6 +182,45 @@ void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr spitfire_insn_access_exception(regs, sfsr, sfar); } +void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + unsigned short type = (type_ctx >> 16); + unsigned short ctx = (type_ctx & 0xffff); + siginfo_t info; + + if (notify_die(DIE_TRAP, "instruction access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("sun4v_insn_access_exception: ADDR[%016lx] " + "CTX[%04x] TYPE[%04x], going.\n", + addr, ctx, type); + die_if_kernel("Iax", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + sun4v_insn_access_exception(regs, addr, type_ctx); +} + void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { siginfo_t info; @@ -227,6 +269,45 @@ void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr spitfire_data_access_exception(regs, sfsr, sfar); } +void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + unsigned short type = (type_ctx >> 16); + unsigned short ctx = (type_ctx & 0xffff); + siginfo_t info; + + if (notify_die(DIE_TRAP, "data access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("sun4v_data_access_exception: ADDR[%016lx] " + "CTX[%04x] TYPE[%04x], going.\n", + addr, ctx, type); + die_if_kernel("Dax", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + sun4v_data_access_exception(regs, addr, type_ctx); +} + #ifdef CONFIG_PCI /* This is really pathetic... */ extern volatile int pci_poke_in_progress; @@ -788,7 +869,8 @@ void __init cheetah_ecache_flush_init(void) cheetah_error_log[i].afsr = CHAFSR_INVALID; __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { + if ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID) { cheetah_error_table = &__jalapeno_error_table[0]; cheetah_afsr_errors = JPAFSR_ERRORS; } else if ((ver >> 32) == 0x003e0015) { @@ -1666,6 +1748,238 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) regs->tpc); } +struct sun4v_error_entry { + u64 err_handle; + u64 err_stick; + + u32 err_type; +#define SUN4V_ERR_TYPE_UNDEFINED 0 +#define SUN4V_ERR_TYPE_UNCORRECTED_RES 1 +#define SUN4V_ERR_TYPE_PRECISE_NONRES 2 +#define SUN4V_ERR_TYPE_DEFERRED_NONRES 3 +#define SUN4V_ERR_TYPE_WARNING_RES 4 + + u32 err_attrs; +#define SUN4V_ERR_ATTRS_PROCESSOR 0x00000001 +#define SUN4V_ERR_ATTRS_MEMORY 0x00000002 +#define SUN4V_ERR_ATTRS_PIO 0x00000004 +#define SUN4V_ERR_ATTRS_INT_REGISTERS 0x00000008 +#define SUN4V_ERR_ATTRS_FPU_REGISTERS 0x00000010 +#define SUN4V_ERR_ATTRS_USER_MODE 0x01000000 +#define SUN4V_ERR_ATTRS_PRIV_MODE 0x02000000 +#define SUN4V_ERR_ATTRS_RES_QUEUE_FULL 0x80000000 + + u64 err_raddr; + u32 err_size; + u16 err_cpu; + u16 err_pad; +}; + +static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0); +static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0); + +static const char *sun4v_err_type_to_str(u32 type) +{ + switch (type) { + case SUN4V_ERR_TYPE_UNDEFINED: + return "undefined"; + case SUN4V_ERR_TYPE_UNCORRECTED_RES: + return "uncorrected resumable"; + case SUN4V_ERR_TYPE_PRECISE_NONRES: + return "precise nonresumable"; + case SUN4V_ERR_TYPE_DEFERRED_NONRES: + return "deferred nonresumable"; + case SUN4V_ERR_TYPE_WARNING_RES: + return "warning resumable"; + default: + return "unknown"; + }; +} + +static void sun4v_log_error(struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt) +{ + int cnt; + + printk("%s: Reporting on cpu %d\n", pfx, cpu); + printk("%s: err_handle[%lx] err_stick[%lx] err_type[%08x:%s]\n", + pfx, + ent->err_handle, ent->err_stick, + ent->err_type, + sun4v_err_type_to_str(ent->err_type)); + printk("%s: err_attrs[%08x:%s %s %s %s %s %s %s %s]\n", + pfx, + ent->err_attrs, + ((ent->err_attrs & SUN4V_ERR_ATTRS_PROCESSOR) ? + "processor" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_MEMORY) ? + "memory" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PIO) ? + "pio" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_INT_REGISTERS) ? + "integer-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_FPU_REGISTERS) ? + "fpu-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_USER_MODE) ? + "user" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PRIV_MODE) ? + "privileged" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL) ? + "queue-full" : "")); + printk("%s: err_raddr[%016lx] err_size[%u] err_cpu[%u]\n", + pfx, + ent->err_raddr, ent->err_size, ent->err_cpu); + + if ((cnt = atomic_read(ocnt)) != 0) { + atomic_set(ocnt, 0); + wmb(); + printk("%s: Queue overflowed %d times.\n", + pfx, cnt); + } +} + +/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate. + * Log the event and clear the first word of the entry. + */ +void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->resum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + + sun4v_log_error(&local_copy, cpu, + KERN_ERR "RESUMABLE ERROR", + &sun4v_resum_oflow_cnt); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_resum_overflow(struct pt_regs *regs) +{ + atomic_inc(&sun4v_resum_oflow_cnt); +} + +/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate. + * Log the event, clear the first word of the entry, and die. + */ +void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->nonresum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + +#ifdef CONFIG_PCI + /* Check for the special PCI poke sequence. */ + if (pci_poke_in_progress && pci_poke_cpu == cpu) { + pci_poke_faulted = 1; + regs->tpc += 4; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + sun4v_log_error(&local_copy, cpu, + KERN_EMERG "NON-RESUMABLE ERROR", + &sun4v_nonresum_oflow_cnt); + + panic("Non-resumable error."); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_nonresum_overflow(struct pt_regs *regs) +{ + /* XXX Actually even this can make not that much sense. Perhaps + * XXX we should just pull the plug and panic directly from here? + */ + atomic_inc(&sun4v_nonresum_oflow_cnt); +} + +unsigned long sun4v_err_itlb_vaddr; +unsigned long sun4v_err_itlb_ctx; +unsigned long sun4v_err_itlb_pte; +unsigned long sun4v_err_itlb_error; + +void sun4v_itlb_error_report(struct pt_regs *regs, int tl) +{ + if (tl > 1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); + printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] " + "pte[%lx] error[%lx]\n", + sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, + sun4v_err_itlb_pte, sun4v_err_itlb_error); + + prom_halt(); +} + +unsigned long sun4v_err_dtlb_vaddr; +unsigned long sun4v_err_dtlb_ctx; +unsigned long sun4v_err_dtlb_pte; +unsigned long sun4v_err_dtlb_error; + +void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) +{ + if (tl > 1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); + printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] " + "pte[%lx] error[%lx]\n", + sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, + sun4v_err_dtlb_pte, sun4v_err_dtlb_error); + + prom_halt(); +} + +void hypervisor_tlbop_error(unsigned long err, unsigned long op) +{ + printk(KERN_CRIT "SUN4V: TLB hv call error %lu for op %lu\n", + err, op); +} + +void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op) +{ + printk(KERN_CRIT "SUN4V: XCALL TLB hv call error %lu for op %lu\n", + err, op); +} + void do_fpe_common(struct pt_regs *regs) { if (regs->tstate & TSTATE_PRIV) { @@ -1924,10 +2238,11 @@ void die_if_kernel(char *str, struct pt_regs *regs) } user_instruction_dump ((unsigned int __user *) regs->tpc); } +#if 0 #ifdef CONFIG_SMP smp_report_regs(); #endif - +#endif if (regs->tstate & TSTATE_PRIV) do_exit(SIGKILL); do_exit(SIGSEGV); @@ -1958,6 +2273,11 @@ void do_illegal_instruction(struct pt_regs *regs) } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { if (handle_ldf_stq(insn, regs)) return; + } else if (tlb_type == hypervisor) { + extern int vis_emul(struct pt_regs *, unsigned int); + + if (!vis_emul(regs, insn)) + return; } } info.si_signo = SIGILL; @@ -1968,6 +2288,8 @@ void do_illegal_instruction(struct pt_regs *regs) force_sig_info(SIGILL, &info, current); } +extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn); + void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { siginfo_t info; @@ -1977,13 +2299,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo return; if (regs->tstate & TSTATE_PRIV) { - extern void kernel_unaligned_trap(struct pt_regs *regs, - unsigned int insn, - unsigned long sfar, - unsigned long sfsr); - - kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc), - sfar, sfsr); + kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); return; } info.si_signo = SIGBUS; @@ -1994,6 +2310,26 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo force_sig_info(SIGBUS, &info, current); } +void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "memory address unaligned", regs, + 0, 0x34, SIGSEGV) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); + return; + } + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + void do_privop(struct pt_regs *regs) { siginfo_t info; @@ -2130,7 +2466,22 @@ void do_getpsr(struct pt_regs *regs) } } +struct trap_per_cpu trap_block[NR_CPUS]; + +/* This can get invoked before sched_init() so play it super safe + * and use hard_smp_processor_id(). + */ +void init_cur_cpu_trap(struct thread_info *t) +{ + int cpu = hard_smp_processor_id(); + struct trap_per_cpu *p = &trap_block[cpu]; + + p->thread = t; + p->pgd_paddr = 0; +} + extern void thread_info_offsets_are_bolixed_dave(void); +extern void trap_per_cpu_offsets_are_bolixed_dave(void); /* Only invoked on boot processor. */ void __init trap_init(void) @@ -2154,7 +2505,6 @@ void __init trap_init(void) TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) || TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) || TI_PCR != offsetof(struct thread_info, pcr_reg) || - TI_CEE_STUFF != offsetof(struct thread_info, cee_stuff) || TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) || TI_NEW_CHILD != offsetof(struct thread_info, new_child) || TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) || @@ -2165,6 +2515,29 @@ void __init trap_init(void) (TI_FPREGS & (64 - 1))) thread_info_offsets_are_bolixed_dave(); + if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || + (TRAP_PER_CPU_PGD_PADDR != + offsetof(struct trap_per_cpu, pgd_paddr)) || + (TRAP_PER_CPU_CPU_MONDO_PA != + offsetof(struct trap_per_cpu, cpu_mondo_pa)) || + (TRAP_PER_CPU_DEV_MONDO_PA != + offsetof(struct trap_per_cpu, dev_mondo_pa)) || + (TRAP_PER_CPU_RESUM_MONDO_PA != + offsetof(struct trap_per_cpu, resum_mondo_pa)) || + (TRAP_PER_CPU_RESUM_KBUF_PA != + offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || + (TRAP_PER_CPU_NONRESUM_MONDO_PA != + offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || + (TRAP_PER_CPU_NONRESUM_KBUF_PA != + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || + (TRAP_PER_CPU_FAULT_INFO != + offsetof(struct trap_per_cpu, fault_info)) || + (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || + (TRAP_PER_CPU_CPU_LIST_PA != + offsetof(struct trap_per_cpu, cpu_list_pa))) + trap_per_cpu_offsets_are_bolixed_dave(); + /* Attach to the address space of init_task. On SMP we * do this in smp.c:smp_callin for other cpus. */ diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S new file mode 100644 index 0000000..118baea --- /dev/null +++ b/arch/sparc64/kernel/tsb.S @@ -0,0 +1,442 @@ +/* tsb.S: Sparc64 TSB table handling. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#include <asm/tsb.h> +#include <asm/hypervisor.h> + + .text + .align 32 + + /* Invoked from TLB miss handler, we are in the + * MMU global registers and they are setup like + * this: + * + * %g1: TSB entry pointer + * %g2: available temporary + * %g3: FAULT_CODE_{D,I}TLB + * %g4: available temporary + * %g5: available temporary + * %g6: TAG TARGET + * %g7: available temporary, will be loaded by us with + * the physical address base of the linux page + * tables for the current address space + */ +tsb_miss_dtlb: + mov TLB_TAG_ACCESS, %g4 + ba,pt %xcc, tsb_miss_page_table_walk + ldxa [%g4] ASI_DMMU, %g4 + +tsb_miss_itlb: + mov TLB_TAG_ACCESS, %g4 + ba,pt %xcc, tsb_miss_page_table_walk + ldxa [%g4] ASI_IMMU, %g4 + + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g4 -- missing virtual address + * %g6 -- TAG TARGET (vaddr >> 22) + */ +tsb_miss_page_table_walk: + TRAP_LOAD_PGD_PHYS(%g7, %g5) + + /* And now we have the PGD base physical address in %g7. */ +tsb_miss_page_table_walk_sun4v_fastpath: + USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) + + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g5 -- physical address of PTE in Linux page tables + * %g6 -- TAG TARGET (vaddr >> 22) + */ +tsb_reload: + TSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, tsb_do_fault + TSB_STORE(%g1, %g7) + + TSB_WRITE(%g1, %g5, %g6) + + /* Finally, load TLB and return from trap. */ +tsb_tlb_reload: + cmp %g3, FAULT_CODE_DTLB + bne,pn %xcc, tsb_itlb_load + nop + +tsb_dtlb_load: + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 + +tsb_itlb_load: + /* Executable bit must be set. */ +661: andcc %g5, _PAGE_EXEC_4U, %g0 + .section .sun4v_1insn_patch, "ax" + .word 661b + andcc %g5, _PAGE_EXEC_4V, %g0 + .previous + + be,pn %xcc, tsb_do_fault + nop + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 + + /* No valid entry in the page tables, do full fault + * processing. + */ + + .globl tsb_do_fault +tsb_do_fault: + cmp %g3, FAULT_CODE_DTLB + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g4 + .previous + + bne,pn %xcc, tsb_do_itlb_fault + nop + +tsb_do_dtlb_fault: + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g5 + .section .sun4v_2insn_patch, "ax" + .word 661b + ldx [%g4 + HV_FAULT_D_ADDR_OFFSET], %g5 + nop + .previous + + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline + nop + +tsb_do_itlb_fault: + rdpr %tpc, %g5 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_ITLB, %g4 + + .globl sparc64_realfault_common +sparc64_realfault_common: + /* fault code in %g4, fault address in %g5, etrap will + * preserve these two values in %l4 and %l5 respectively + */ + ba,pt %xcc, etrap ! Save trap state +1: rd %pc, %g7 ! ... + stb %l4, [%g6 + TI_FAULT_CODE] ! Save fault code + stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address + call do_sparc64_fault ! Call fault handler + add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg + ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state + nop ! Delay slot (fill me) + +winfix_trampoline: + rdpr %tpc, %g3 ! Prepare winfixup TNPC + or %g3, 0x7c, %g3 ! Compute branch offset + wrpr %g3, %tnpc ! Write it into TNPC + done ! Trap return + + /* Insert an entry into the TSB. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + * %o2: pte + */ + .align 32 + .globl __tsb_insert +__tsb_insert: + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate + TSB_LOCK_TAG(%o0, %g2, %g3) + TSB_WRITE(%o0, %o2, %o1) + wrpr %o5, %pstate + retl + nop + .size __tsb_insert, .-__tsb_insert + + /* Flush the given TSB entry if it has the matching + * tag. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + */ + .align 32 + .globl tsb_flush + .type tsb_flush,#function +tsb_flush: + sethi %hi(TSB_TAG_LOCK_HIGH), %g2 +1: TSB_LOAD_TAG(%o0, %g1) + srlx %g1, 32, %o3 + andcc %o3, %g2, %g0 + bne,pn %icc, 1b + membar #LoadLoad + cmp %g1, %o1 + mov 1, %o3 + bne,pt %xcc, 2f + sllx %o3, TSB_TAG_INVALID_BIT, %o3 + TSB_CAS_TAG(%o0, %g1, %o3) + cmp %g1, %o3 + bne,pn %xcc, 1b + nop +2: retl + TSB_MEMBAR + .size tsb_flush, .-tsb_flush + + /* Reload MMU related context switch state at + * schedule() time. + * + * %o0: page table physical address + * %o1: TSB register value + * %o2: TSB virtual address + * %o3: TSB mapping locked PTE + * %o4: Hypervisor TSB descriptor physical address + * + * We have to run this whole thing with interrupts + * disabled so that the current cpu doesn't change + * due to preemption. + */ + .align 32 + .globl __tsb_context_switch + .type __tsb_context_switch,#function +__tsb_context_switch: + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate + + ldub [%g6 + TI_CPU], %g1 + sethi %hi(trap_block), %g2 + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1 + or %g2, %lo(trap_block), %g2 + add %g2, %g1, %g2 + stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] + + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + nop + + /* Hypervisor TSB switch. */ + mov SCRATCHPAD_UTSBREG1, %g1 + stxa %o1, [%g1] ASI_SCRATCHPAD + mov -1, %g2 + mov SCRATCHPAD_UTSBREG2, %g1 + stxa %g2, [%g1] ASI_SCRATCHPAD + + /* Save away %o5's %pstate, we have to use %o5 for + * the hypervisor call. + */ + mov %o5, %g1 + + mov HV_FAST_MMU_TSB_CTXNON0, %o5 + mov 1, %o0 + mov %o4, %o1 + ta HV_FAST_TRAP + + /* Finish up and restore %o5. */ + ba,pt %xcc, 9f + mov %g1, %o5 + + /* SUN4U TSB switch. */ +1: mov TSB_REG, %g1 + stxa %o1, [%g1] ASI_DMMU + membar #Sync + stxa %o1, [%g1] ASI_IMMU + membar #Sync + +2: brz %o2, 9f + nop + + sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2 + mov TLB_TAG_ACCESS, %g1 + lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2 + stxa %o2, [%g1] ASI_DMMU + membar #Sync + sllx %g2, 3, %g2 + stxa %o3, [%g2] ASI_DTLB_DATA_ACCESS + membar #Sync +9: + wrpr %o5, %pstate + + retl + nop + .size __tsb_context_switch, .-__tsb_context_switch + +#define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \ + (1 << TSB_TAG_INVALID_BIT)) + + .align 32 + .globl copy_tsb + .type copy_tsb,#function +copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size + * %o2=new_tsb_base, %o3=new_tsb_size + */ + sethi %uhi(TSB_PASS_BITS), %g7 + srlx %o3, 4, %o3 + add %o0, %o1, %g1 /* end of old tsb */ + sllx %g7, 32, %g7 + sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + +661: prefetcha [%o0] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o0] ASI_PHYS_USE_EC, #one_read + .previous + +90: andcc %o0, (64 - 1), %g0 + bne 1f + add %o0, 64, %o5 + +661: prefetcha [%o5] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o5] ASI_PHYS_USE_EC, #one_read + .previous + +1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */ + andcc %g2, %g7, %g0 /* LOCK or INVALID set? */ + bne,pn %xcc, 80f /* Skip it */ + sllx %g2, 22, %o4 /* TAG --> VADDR */ + + /* This can definitely be computed faster... */ + srlx %o0, 4, %o5 /* Build index */ + and %o5, 511, %o5 /* Mask index */ + sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + or %o4, %o5, %o4 /* Full VADDR. */ + srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ + sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ + TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ + add %o4, 0x8, %o4 /* Advance to TTE */ + TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ + +80: add %o0, 16, %o0 + cmp %o0, %g1 + bne,pt %xcc, 90b + nop + + retl + TSB_MEMBAR + .size copy_tsb, .-copy_tsb + + /* Set the invalid bit in all TSB entries. */ + .align 32 + .globl tsb_init + .type tsb_init,#function +tsb_init: /* %o0 = TSB vaddr, %o1 = size in bytes */ + prefetch [%o0 + 0x000], #n_writes + mov 1, %g1 + prefetch [%o0 + 0x040], #n_writes + sllx %g1, TSB_TAG_INVALID_BIT, %g1 + prefetch [%o0 + 0x080], #n_writes +1: prefetch [%o0 + 0x0c0], #n_writes + stx %g1, [%o0 + 0x00] + stx %g1, [%o0 + 0x10] + stx %g1, [%o0 + 0x20] + stx %g1, [%o0 + 0x30] + prefetch [%o0 + 0x100], #n_writes + stx %g1, [%o0 + 0x40] + stx %g1, [%o0 + 0x50] + stx %g1, [%o0 + 0x60] + stx %g1, [%o0 + 0x70] + prefetch [%o0 + 0x140], #n_writes + stx %g1, [%o0 + 0x80] + stx %g1, [%o0 + 0x90] + stx %g1, [%o0 + 0xa0] + stx %g1, [%o0 + 0xb0] + prefetch [%o0 + 0x180], #n_writes + stx %g1, [%o0 + 0xc0] + stx %g1, [%o0 + 0xd0] + stx %g1, [%o0 + 0xe0] + stx %g1, [%o0 + 0xf0] + subcc %o1, 0x100, %o1 + bne,pt %xcc, 1b + add %o0, 0x100, %o0 + retl + nop + nop + nop + .size tsb_init, .-tsb_init + + .globl NGtsb_init + .type NGtsb_init,#function +NGtsb_init: + rd %asi, %g2 + mov 1, %g1 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + sllx %g1, TSB_TAG_INVALID_BIT, %g1 +1: stxa %g1, [%o0 + 0x00] %asi + stxa %g1, [%o0 + 0x10] %asi + stxa %g1, [%o0 + 0x20] %asi + stxa %g1, [%o0 + 0x30] %asi + stxa %g1, [%o0 + 0x40] %asi + stxa %g1, [%o0 + 0x50] %asi + stxa %g1, [%o0 + 0x60] %asi + stxa %g1, [%o0 + 0x70] %asi + stxa %g1, [%o0 + 0x80] %asi + stxa %g1, [%o0 + 0x90] %asi + stxa %g1, [%o0 + 0xa0] %asi + stxa %g1, [%o0 + 0xb0] %asi + stxa %g1, [%o0 + 0xc0] %asi + stxa %g1, [%o0 + 0xd0] %asi + stxa %g1, [%o0 + 0xe0] %asi + stxa %g1, [%o0 + 0xf0] %asi + subcc %o1, 0x100, %o1 + bne,pt %xcc, 1b + add %o0, 0x100, %o0 + retl + wr %g2, 0x0, %asi + .size NGtsb_init, .-NGtsb_init diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 8365bc1..5d90151 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -1,7 +1,6 @@ -/* $Id: ttable.S,v 1.38 2002/02/09 19:49:30 davem Exp $ - * ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah extensions. +/* ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah/SUN4V extensions. * - * Copyright (C) 1996, 2001 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 2001, 2006 David S. Miller (davem@davemloft.net) */ #include <linux/config.h> @@ -19,7 +18,7 @@ tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3) tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7) tl0_iax: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception) -tl0_resv009: BTRAP(0x9) +tl0_itsb_4v: SUN4V_ITSB_MISS tl0_iae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf) @@ -38,7 +37,7 @@ tl0_div0: TRAP(do_div0) tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e) tl0_resv02f: BTRAP(0x2f) tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception) -tl0_resv031: BTRAP(0x31) +tl0_dtsb_4v: SUN4V_DTSB_MISS tl0_dae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl0_resv033: BTRAP(0x33) @@ -52,12 +51,13 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) +tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) #else tl0_irq1: BTRAP(0x41) tl0_irq2: BTRAP(0x42) tl0_irq3: BTRAP(0x43) +tl0_irq4: BTRAP(0x44) #endif -tl0_irq4: TRAP_IRQ(handler_irq, 4) tl0_irq5: TRAP_IRQ(handler_irq, 5) TRAP_IRQ(handler_irq, 6) tl0_irq7: TRAP_IRQ(handler_irq, 7) TRAP_IRQ(handler_irq, 8) tl0_irq9: TRAP_IRQ(handler_irq, 9) TRAP_IRQ(handler_irq, 10) @@ -78,9 +78,9 @@ tl0_vaw: TRAP(do_vaw) tl0_cee: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_cee_trap) tl0_iamiss: -#include "itlb_base.S" +#include "itlb_miss.S" tl0_damiss: -#include "dtlb_base.S" +#include "dtlb_miss.S" tl0_daprot: #include "dtlb_prot.S" tl0_fecc: BTRAP(0x70) /* Fast-ECC on Cheetah */ @@ -88,15 +88,18 @@ tl0_dcpe: BTRAP(0x71) /* D-cache Parity Error on Cheetah+ */ tl0_icpe: BTRAP(0x72) /* I-cache Parity Error on Cheetah+ */ tl0_resv073: BTRAP(0x73) BTRAP(0x74) BTRAP(0x75) tl0_resv076: BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b) -tl0_resv07c: BTRAP(0x7c) BTRAP(0x7d) BTRAP(0x7e) BTRAP(0x7f) +tl0_cpu_mondo: TRAP_NOSAVE(sun4v_cpu_mondo) +tl0_dev_mondo: TRAP_NOSAVE(sun4v_dev_mondo) +tl0_res_mondo: TRAP_NOSAVE(sun4v_res_mondo) +tl0_nres_mondo: TRAP_NOSAVE(sun4v_nonres_mondo) tl0_s0n: SPILL_0_NORMAL tl0_s1n: SPILL_1_NORMAL tl0_s2n: SPILL_2_NORMAL -tl0_s3n: SPILL_3_NORMAL -tl0_s4n: SPILL_4_NORMAL -tl0_s5n: SPILL_5_NORMAL -tl0_s6n: SPILL_6_NORMAL -tl0_s7n: SPILL_7_NORMAL +tl0_s3n: SPILL_0_NORMAL_ETRAP +tl0_s4n: SPILL_1_GENERIC_ETRAP +tl0_s5n: SPILL_1_GENERIC_ETRAP_FIXUP +tl0_s6n: SPILL_2_GENERIC_ETRAP +tl0_s7n: SPILL_2_GENERIC_ETRAP_FIXUP tl0_s0o: SPILL_0_OTHER tl0_s1o: SPILL_1_OTHER tl0_s2o: SPILL_2_OTHER @@ -110,9 +113,9 @@ tl0_f1n: FILL_1_NORMAL tl0_f2n: FILL_2_NORMAL tl0_f3n: FILL_3_NORMAL tl0_f4n: FILL_4_NORMAL -tl0_f5n: FILL_5_NORMAL -tl0_f6n: FILL_6_NORMAL -tl0_f7n: FILL_7_NORMAL +tl0_f5n: FILL_0_NORMAL_RTRAP +tl0_f6n: FILL_1_GENERIC_RTRAP +tl0_f7n: FILL_2_GENERIC_RTRAP tl0_f0o: FILL_0_OTHER tl0_f1o: FILL_1_OTHER tl0_f2o: FILL_2_OTHER @@ -128,7 +131,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) .globl tl0_solaris tl0_solaris: SOLARIS_SYSCALL_TRAP -tl0_netbsd: NETBSD_SYSCALL_TRAP +tl0_resv109: BTRAP(0x109) tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e) tl0_resv10f: BTRAP(0x10f) tl0_linux32: LINUX_32BIT_SYSCALL_TRAP @@ -179,7 +182,7 @@ sparc64_ttable_tl1: tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3) tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7) tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1) -tl1_resv009: BTRAPTL1(0x9) +tl1_itsb_4v: SUN4V_ITSB_MISS tl1_iae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf) @@ -198,7 +201,7 @@ tl1_div0: TRAPTL1(do_div0_tl1) tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c) tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f) tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1) -tl1_resv031: BTRAPTL1(0x31) +tl1_dtsb_4v: SUN4V_DTSB_MISS tl1_dae: membar #Sync TRAP_NOSAVE_7INSNS(__spitfire_access_error) tl1_resv033: BTRAPTL1(0x33) @@ -222,26 +225,10 @@ tl1_resv05c: BTRAPTL1(0x5c) BTRAPTL1(0x5d) BTRAPTL1(0x5e) BTRAPTL1(0x5f) tl1_ivec: TRAP_IVEC tl1_paw: TRAPTL1(do_paw_tl1) tl1_vaw: TRAPTL1(do_vaw_tl1) - - /* The grotty trick to save %g1 into current->thread.cee_stuff - * is because when we take this trap we could be interrupting - * trap code already using the trap alternate global registers. - * - * We cross our fingers and pray that this store/load does - * not cause yet another CEE trap. - */ -tl1_cee: membar #Sync - stx %g1, [%g6 + TI_CEE_STUFF] - ldxa [%g0] ASI_AFSR, %g1 - membar #Sync - stxa %g1, [%g0] ASI_AFSR - membar #Sync - ldx [%g6 + TI_CEE_STUFF], %g1 - retry - +tl1_cee: BTRAPTL1(0x63) tl1_iamiss: BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67) tl1_damiss: -#include "dtlb_backend.S" +#include "dtlb_miss.S" tl1_daprot: #include "dtlb_prot.S" tl1_fecc: BTRAPTL1(0x70) /* Fast-ECC on Cheetah */ diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index 70faf63..001e851 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -277,7 +277,7 @@ static void kernel_mna_trap_fault(void) regs->tstate |= (ASI_AIUS << 24UL); } -asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, unsigned long sfar, unsigned long sfsr) +asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) { enum direction dir = decode_direction(insn); int size = decode_access_size(insn); @@ -405,6 +405,9 @@ extern void do_privact(struct pt_regs *regs); extern void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar); +extern void sun4v_data_access_exception(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); int handle_ldf_stq(u32 insn, struct pt_regs *regs) { @@ -447,14 +450,20 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) break; } default: - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } if (put_user (first >> 32, (u32 __user *)addr) || __put_user ((u32)first, (u32 __user *)(addr + 4)) || __put_user (second >> 32, (u32 __user *)(addr + 8)) || __put_user ((u32)second, (u32 __user *)(addr + 12))) { - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } } else { @@ -467,7 +476,10 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) do_privact(regs); return 1; } else if (asi > ASI_SNFL) { - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } switch (insn & 0x180000) { @@ -484,7 +496,10 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) err |= __get_user (data[i], (u32 __user *)(addr + 4*i)); } if (err && !(asi & 0x2 /* NF */)) { - spitfire_data_access_exception(regs, 0, addr); + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); return 1; } if (asi & 0x8) /* Little */ { @@ -548,7 +563,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr u32 insn; u32 first, second; u64 value; - u8 asi, freg; + u8 freg; int flag; struct fpustate *f = FPUSTATE; @@ -557,7 +572,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { - asi = sfsr >> 16; + int asi = decode_asi(insn, regs); if ((asi > ASI_SNFL) || (asi < ASI_P)) goto daex; @@ -587,7 +602,11 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr *(u64 *)(f->regs + freg) = value; current_thread_info()->fpsaved[0] |= flag; } else { -daex: spitfire_data_access_exception(regs, sfsr, sfar); +daex: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, sfar, sfsr); + else + spitfire_data_access_exception(regs, sfsr, sfar); return; } advance(regs); @@ -600,7 +619,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr unsigned long tstate = regs->tstate; u32 insn; u64 value; - u8 asi, freg; + u8 freg; int flag; struct fpustate *f = FPUSTATE; @@ -609,8 +628,8 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { + int asi = decode_asi(insn, regs); freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); - asi = sfsr >> 16; value = 0; flag = (freg < 32) ? FPRS_DL : FPRS_DU; if ((asi > ASI_SNFL) || @@ -631,7 +650,11 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr __put_user ((u32)value, (u32 __user *)(sfar + 4))) goto daex; } else { -daex: spitfire_data_access_exception(regs, sfsr, sfar); +daex: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, sfar, sfsr); + else + spitfire_data_access_exception(regs, sfsr, sfar); return; } advance(regs); diff --git a/arch/sparc64/kernel/us2e_cpufreq.c b/arch/sparc64/kernel/us2e_cpufreq.c index b35dc8d..1f83fe6 100644 --- a/arch/sparc64/kernel/us2e_cpufreq.c +++ b/arch/sparc64/kernel/us2e_cpufreq.c @@ -346,6 +346,9 @@ static int __init us2e_freq_init(void) unsigned long manuf, impl, ver; int ret; + if (tlb_type != spitfire) + return -ENODEV; + __asm__("rdpr %%ver, %0" : "=r" (ver)); manuf = ((ver >> 48) & 0xffff); impl = ((ver >> 32) & 0xffff); @@ -354,20 +357,16 @@ static int __init us2e_freq_init(void) struct cpufreq_driver *driver; ret = -ENOMEM; - driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); + driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); if (!driver) goto err_out; - memset(driver, 0, sizeof(*driver)); - us2e_freq_table = kmalloc( + us2e_freq_table = kzalloc( (NR_CPUS * sizeof(struct us2e_freq_percpu_info)), GFP_KERNEL); if (!us2e_freq_table) goto err_out; - memset(us2e_freq_table, 0, - (NR_CPUS * sizeof(struct us2e_freq_percpu_info))); - driver->init = us2e_freq_cpu_init; driver->verify = us2e_freq_verify; driver->target = us2e_freq_target; diff --git a/arch/sparc64/kernel/us3_cpufreq.c b/arch/sparc64/kernel/us3_cpufreq.c index 6d1f9a3..47e3aca 100644 --- a/arch/sparc64/kernel/us3_cpufreq.c +++ b/arch/sparc64/kernel/us3_cpufreq.c @@ -203,6 +203,9 @@ static int __init us3_freq_init(void) unsigned long manuf, impl, ver; int ret; + if (tlb_type != cheetah && tlb_type != cheetah_plus) + return -ENODEV; + __asm__("rdpr %%ver, %0" : "=r" (ver)); manuf = ((ver >> 48) & 0xffff); impl = ((ver >> 32) & 0xffff); @@ -215,20 +218,16 @@ static int __init us3_freq_init(void) struct cpufreq_driver *driver; ret = -ENOMEM; - driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); + driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); if (!driver) goto err_out; - memset(driver, 0, sizeof(*driver)); - us3_freq_table = kmalloc( + us3_freq_table = kzalloc( (NR_CPUS * sizeof(struct us3_freq_percpu_info)), GFP_KERNEL); if (!us3_freq_table) goto err_out; - memset(us3_freq_table, 0, - (NR_CPUS * sizeof(struct us3_freq_percpu_info))); - driver->init = us3_freq_cpu_init; driver->verify = us3_freq_verify; driver->target = us3_freq_target; diff --git a/arch/sparc64/kernel/visemul.c b/arch/sparc64/kernel/visemul.c new file mode 100644 index 0000000..84fedaa --- /dev/null +++ b/arch/sparc64/kernel/visemul.c @@ -0,0 +1,894 @@ +/* visemul.c: Emulation of VIS instructions. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/thread_info.h> + +#include <asm/ptrace.h> +#include <asm/pstate.h> +#include <asm/system.h> +#include <asm/fpumacro.h> +#include <asm/uaccess.h> + +/* OPF field of various VIS instructions. */ + +/* 000111011 - four 16-bit packs */ +#define FPACK16_OPF 0x03b + +/* 000111010 - two 32-bit packs */ +#define FPACK32_OPF 0x03a + +/* 000111101 - four 16-bit packs */ +#define FPACKFIX_OPF 0x03d + +/* 001001101 - four 16-bit expands */ +#define FEXPAND_OPF 0x04d + +/* 001001011 - two 32-bit merges */ +#define FPMERGE_OPF 0x04b + +/* 000110001 - 8-by-16-bit partitoned product */ +#define FMUL8x16_OPF 0x031 + +/* 000110011 - 8-by-16-bit upper alpha partitioned product */ +#define FMUL8x16AU_OPF 0x033 + +/* 000110101 - 8-by-16-bit lower alpha partitioned product */ +#define FMUL8x16AL_OPF 0x035 + +/* 000110110 - upper 8-by-16-bit partitioned product */ +#define FMUL8SUx16_OPF 0x036 + +/* 000110111 - lower 8-by-16-bit partitioned product */ +#define FMUL8ULx16_OPF 0x037 + +/* 000111000 - upper 8-by-16-bit partitioned product */ +#define FMULD8SUx16_OPF 0x038 + +/* 000111001 - lower unsigned 8-by-16-bit partitioned product */ +#define FMULD8ULx16_OPF 0x039 + +/* 000101000 - four 16-bit compare; set rd if src1 > src2 */ +#define FCMPGT16_OPF 0x028 + +/* 000101100 - two 32-bit compare; set rd if src1 > src2 */ +#define FCMPGT32_OPF 0x02c + +/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ +#define FCMPLE16_OPF 0x020 + +/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ +#define FCMPLE32_OPF 0x024 + +/* 000100010 - four 16-bit compare; set rd if src1 != src2 */ +#define FCMPNE16_OPF 0x022 + +/* 000100110 - two 32-bit compare; set rd if src1 != src2 */ +#define FCMPNE32_OPF 0x026 + +/* 000101010 - four 16-bit compare; set rd if src1 == src2 */ +#define FCMPEQ16_OPF 0x02a + +/* 000101110 - two 32-bit compare; set rd if src1 == src2 */ +#define FCMPEQ32_OPF 0x02e + +/* 000000000 - Eight 8-bit edge boundary processing */ +#define EDGE8_OPF 0x000 + +/* 000000001 - Eight 8-bit edge boundary processing, no CC */ +#define EDGE8N_OPF 0x001 + +/* 000000010 - Eight 8-bit edge boundary processing, little-endian */ +#define EDGE8L_OPF 0x002 + +/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ +#define EDGE8LN_OPF 0x003 + +/* 000000100 - Four 16-bit edge boundary processing */ +#define EDGE16_OPF 0x004 + +/* 000000101 - Four 16-bit edge boundary processing, no CC */ +#define EDGE16N_OPF 0x005 + +/* 000000110 - Four 16-bit edge boundary processing, little-endian */ +#define EDGE16L_OPF 0x006 + +/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ +#define EDGE16LN_OPF 0x007 + +/* 000001000 - Two 32-bit edge boundary processing */ +#define EDGE32_OPF 0x008 + +/* 000001001 - Two 32-bit edge boundary processing, no CC */ +#define EDGE32N_OPF 0x009 + +/* 000001010 - Two 32-bit edge boundary processing, little-endian */ +#define EDGE32L_OPF 0x00a + +/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ +#define EDGE32LN_OPF 0x00b + +/* 000111110 - distance between 8 8-bit components */ +#define PDIST_OPF 0x03e + +/* 000010000 - convert 8-bit 3-D address to blocked byte address */ +#define ARRAY8_OPF 0x010 + +/* 000010010 - convert 16-bit 3-D address to blocked byte address */ +#define ARRAY16_OPF 0x012 + +/* 000010100 - convert 32-bit 3-D address to blocked byte address */ +#define ARRAY32_OPF 0x014 + +/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ +#define BMASK_OPF 0x019 + +/* 001001100 - Permute bytes as specified by GSR.MASK */ +#define BSHUFFLE_OPF 0x04c + +#define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19)) +#define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19)) + +#define VIS_OPF_SHIFT 5 +#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) + +#define RS1(INSN) (((INSN) >> 24) & 0x1f) +#define RS2(INSN) (((INSN) >> 0) & 0x1f) +#define RD(INSN) (((INSN) >> 25) & 0x1f) + +static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, + unsigned int rd, int from_kernel) +{ + if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { + if (from_kernel != 0) + __asm__ __volatile__("flushw"); + else + flushw_user(); + } +} + +static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) +{ + unsigned long value; + + if (reg < 16) + return (!reg ? 0 : regs->u_regs[reg]); + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *win; + win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + value = win->locals[reg - 16]; + } else if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + get_user(value, &win32->locals[reg - 16]); + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + get_user(value, &win->locals[reg - 16]); + } + return value; +} + +static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, + struct pt_regs *regs) +{ + BUG_ON(reg < 16); + BUG_ON(regs->tstate & TSTATE_PRIV); + + if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + return (unsigned long __user *)&win32->locals[reg - 16]; + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + return &win->locals[reg - 16]; + } +} + +static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, + struct pt_regs *regs) +{ + BUG_ON(reg >= 16); + BUG_ON(regs->tstate & TSTATE_PRIV); + + return ®s->u_regs[reg]; +} + +static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) +{ + if (rd < 16) { + unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); + + *rd_kern = val; + } else { + unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); + + if (test_thread_flag(TIF_32BIT)) + __put_user((u32)val, (u32 __user *)rd_user); + else + __put_user(val, rd_user); + } +} + +static inline unsigned long fpd_regval(struct fpustate *f, + unsigned int insn_regnum) +{ + insn_regnum = (((insn_regnum & 1) << 5) | + (insn_regnum & 0x1e)); + + return *(unsigned long *) &f->regs[insn_regnum]; +} + +static inline unsigned long *fpd_regaddr(struct fpustate *f, + unsigned int insn_regnum) +{ + insn_regnum = (((insn_regnum & 1) << 5) | + (insn_regnum & 0x1e)); + + return (unsigned long *) &f->regs[insn_regnum]; +} + +static inline unsigned int fps_regval(struct fpustate *f, + unsigned int insn_regnum) +{ + return f->regs[insn_regnum]; +} + +static inline unsigned int *fps_regaddr(struct fpustate *f, + unsigned int insn_regnum) +{ + return &f->regs[insn_regnum]; +} + +struct edge_tab { + u16 left, right; +}; +struct edge_tab edge8_tab[8] = { + { 0xff, 0x80 }, + { 0x7f, 0xc0 }, + { 0x3f, 0xe0 }, + { 0x1f, 0xf0 }, + { 0x0f, 0xf8 }, + { 0x07, 0xfc }, + { 0x03, 0xfe }, + { 0x01, 0xff }, +}; +struct edge_tab edge8_tab_l[8] = { + { 0xff, 0x01 }, + { 0xfe, 0x03 }, + { 0xfc, 0x07 }, + { 0xf8, 0x0f }, + { 0xf0, 0x1f }, + { 0xe0, 0x3f }, + { 0xc0, 0x7f }, + { 0x80, 0xff }, +}; +struct edge_tab edge16_tab[4] = { + { 0xf, 0x8 }, + { 0x7, 0xc }, + { 0x3, 0xe }, + { 0x1, 0xf }, +}; +struct edge_tab edge16_tab_l[4] = { + { 0xf, 0x1 }, + { 0xe, 0x3 }, + { 0xc, 0x7 }, + { 0x8, 0xf }, +}; +struct edge_tab edge32_tab[2] = { + { 0x3, 0x2 }, + { 0x1, 0x3 }, +}; +struct edge_tab edge32_tab_l[2] = { + { 0x3, 0x1 }, + { 0x2, 0x3 }, +}; + +static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; + u16 left, right; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); + orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); + + if (test_thread_flag(TIF_32BIT)) { + rs1 = rs1 & 0xffffffff; + rs2 = rs2 & 0xffffffff; + } + switch (opf) { + default: + case EDGE8_OPF: + case EDGE8N_OPF: + left = edge8_tab[rs1 & 0x7].left; + right = edge8_tab[rs2 & 0x7].right; + break; + case EDGE8L_OPF: + case EDGE8LN_OPF: + left = edge8_tab_l[rs1 & 0x7].left; + right = edge8_tab_l[rs2 & 0x7].right; + break; + + case EDGE16_OPF: + case EDGE16N_OPF: + left = edge16_tab[(rs1 >> 1) & 0x3].left; + right = edge16_tab[(rs2 >> 1) & 0x3].right; + break; + + case EDGE16L_OPF: + case EDGE16LN_OPF: + left = edge16_tab_l[(rs1 >> 1) & 0x3].left; + right = edge16_tab_l[(rs2 >> 1) & 0x3].right; + break; + + case EDGE32_OPF: + case EDGE32N_OPF: + left = edge32_tab[(rs1 >> 2) & 0x1].left; + right = edge32_tab[(rs2 >> 2) & 0x1].right; + break; + + case EDGE32L_OPF: + case EDGE32LN_OPF: + left = edge32_tab_l[(rs1 >> 2) & 0x1].left; + right = edge32_tab_l[(rs2 >> 2) & 0x1].right; + break; + }; + + if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) + rd_val = right & left; + else + rd_val = left; + + store_reg(regs, rd_val, RD(insn)); + + switch (opf) { + case EDGE8_OPF: + case EDGE8L_OPF: + case EDGE16_OPF: + case EDGE16L_OPF: + case EDGE32_OPF: + case EDGE32L_OPF: { + unsigned long ccr, tstate; + + __asm__ __volatile__("subcc %1, %2, %%g0\n\t" + "rd %%ccr, %0" + : "=r" (ccr) + : "r" (orig_rs1), "r" (orig_rs2) + : "cc"); + tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); + regs->tstate = tstate | (ccr << 32UL); + } + }; +} + +static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + unsigned long rs1, rs2, rd_val; + unsigned int bits, bits_mask; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + rs1 = fetch_reg(RS1(insn), regs); + rs2 = fetch_reg(RS2(insn), regs); + + bits = (rs2 > 5 ? 5 : rs2); + bits_mask = (1UL << bits) - 1UL; + + rd_val = ((((rs1 >> 11) & 0x3) << 0) | + (((rs1 >> 33) & 0x3) << 2) | + (((rs1 >> 55) & 0x1) << 4) | + (((rs1 >> 13) & 0xf) << 5) | + (((rs1 >> 35) & 0xf) << 9) | + (((rs1 >> 56) & 0xf) << 13) | + (((rs1 >> 17) & bits_mask) << 17) | + (((rs1 >> 39) & bits_mask) << (17 + bits)) | + (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); + + switch (opf) { + case ARRAY16_OPF: + rd_val <<= 1; + break; + + case ARRAY32_OPF: + rd_val <<= 2; + }; + + store_reg(regs, rd_val, RD(insn)); +} + +static void bmask(struct pt_regs *regs, unsigned int insn) +{ + unsigned long rs1, rs2, rd_val, gsr; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + rs1 = fetch_reg(RS1(insn), regs); + rs2 = fetch_reg(RS2(insn), regs); + rd_val = rs1 + rs2; + + store_reg(regs, rd_val, RD(insn)); + + gsr = current_thread_info()->gsr[0] & 0xffffffff; + gsr |= rd_val << 32UL; + current_thread_info()->gsr[0] = gsr; +} + +static void bshuffle(struct pt_regs *regs, unsigned int insn) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val; + unsigned long bmask, i; + + bmask = current_thread_info()->gsr[0] >> 32UL; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0UL; + for (i = 0; i < 8; i++) { + unsigned long which = (bmask >> (i * 4)) & 0xf; + unsigned long byte; + + if (which < 8) + byte = (rs1 >> (which * 8)) & 0xff; + else + byte = (rs2 >> ((which-8)*8)) & 0xff; + rd_val |= (byte << (i * 8)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; +} + +static void pdist(struct pt_regs *regs, unsigned int insn) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, *rd, rd_val; + unsigned long i; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS1(insn)); + rd = fpd_regaddr(f, RD(insn)); + + rd_val = *rd; + + for (i = 0; i < 8; i++) { + s16 s1, s2; + + s1 = (rs1 >> (56 - (i * 8))) & 0xff; + s2 = (rs2 >> (56 - (i * 8))) & 0xff; + + /* Absolute value of difference. */ + s1 -= s2; + if (s1 < 0) + s1 = ~s1 + 1; + + rd_val += s1; + } + + *rd = rd_val; +} + +static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, gsr, scale, rd_val; + + gsr = current_thread_info()->gsr[0]; + scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); + switch (opf) { + case FPACK16_OPF: { + unsigned long byte; + + rs2 = fpd_regval(f, RS2(insn)); + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + unsigned int val; + s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; + int scaled = src << scale; + int from_fixed = scaled >> 7; + + val = ((from_fixed < 0) ? + 0 : + (from_fixed > 255) ? + 255 : from_fixed); + + rd_val |= (val << (8 * byte)); + } + *fps_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPACK32_OPF: { + unsigned long word; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); + for (word = 0; word < 2; word++) { + unsigned long val; + s32 src = (rs2 >> (word * 32UL)); + s64 scaled = src << scale; + s64 from_fixed = scaled >> 23; + + val = ((from_fixed < 0) ? + 0 : + (from_fixed > 255) ? + 255 : from_fixed); + + rd_val |= (val << (32 * word)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPACKFIX_OPF: { + unsigned long word; + + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + for (word = 0; word < 2; word++) { + long val; + s32 src = (rs2 >> (word * 32UL)); + s64 scaled = src << scale; + s64 from_fixed = scaled >> 16; + + val = ((from_fixed < -32768) ? + -32768 : + (from_fixed > 32767) ? + 32767 : from_fixed); + + rd_val |= ((val & 0xffff) << (word * 16)); + } + *fps_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FEXPAND_OPF: { + unsigned long byte; + + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + unsigned long val; + u8 src = (rs2 >> (byte * 8)) & 0xff; + + val = src << 4; + + rd_val |= (val << (byte * 16)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPMERGE_OPF: { + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = (((rs2 & 0x000000ff) << 0) | + ((rs1 & 0x000000ff) << 8) | + ((rs2 & 0x0000ff00) << 8) | + ((rs1 & 0x0000ff00) << 16) | + ((rs2 & 0x00ff0000) << 16) | + ((rs1 & 0x00ff0000) << 24) | + ((rs2 & 0xff000000) << 24) | + ((rs1 & 0xff000000) << 32)); + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + }; +} + +static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val; + + switch (opf) { + case FMUL8x16_OPF: { + unsigned long byte; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; + s16 src2 = (rs2 >> (byte * 16)) & 0xffff; + u32 prod = src1 * src2; + u16 scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMUL8x16AU_OPF: + case FMUL8x16AL_OPF: { + unsigned long byte; + s16 src2; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); + for (byte = 0; byte < 4; byte++) { + u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; + u32 prod = src1 * src2; + u16 scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMUL8SUx16_OPF: + case FMUL8ULx16_OPF: { + unsigned long byte, ushift; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; + for (byte = 0; byte < 4; byte++) { + u16 src1; + s16 src2; + u32 prod; + u16 scaled; + + src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); + src2 = ((rs2 >> (16 * byte)) & 0xffff); + prod = src1 * src2; + scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMULD8SUx16_OPF: + case FMULD8ULx16_OPF: { + unsigned long byte, ushift; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; + for (byte = 0; byte < 2; byte++) { + u16 src1; + s16 src2; + u32 prod; + u16 scaled; + + src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); + src2 = ((rs2 >> (16 * byte)) & 0xffff); + prod = src1 * src2; + scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << + ((byte * 32UL) + 7UL)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + }; +} + +static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val, i; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + + switch (opf) { + case FCMPGT16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a > b) + rd_val |= 1 << i; + } + break; + + case FCMPGT32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a > b) + rd_val |= 1 << i; + } + break; + + case FCMPLE16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a <= b) + rd_val |= 1 << i; + } + break; + + case FCMPLE32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a <= b) + rd_val |= 1 << i; + } + break; + + case FCMPNE16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a != b) + rd_val |= 1 << i; + } + break; + + case FCMPNE32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a != b) + rd_val |= 1 << i; + } + break; + + case FCMPEQ16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a == b) + rd_val |= 1 << i; + } + break; + + case FCMPEQ32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a == b) + rd_val |= 1 << i; + } + break; + }; + + maybe_flush_windows(0, 0, RD(insn), 0); + store_reg(regs, rd_val, RD(insn)); +} + +/* Emulate the VIS instructions which are not implemented in + * hardware on Niagara. + */ +int vis_emul(struct pt_regs *regs, unsigned int insn) +{ + unsigned long pc = regs->tpc; + unsigned int opf; + + BUG_ON(regs->tstate & TSTATE_PRIV); + + if (test_thread_flag(TIF_32BIT)) + pc = (u32)pc; + + if (get_user(insn, (u32 __user *) pc)) + return -EFAULT; + + if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL) + return -EINVAL; + + opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; + switch (opf) { + default: + return -EINVAL; + + /* Pixel Formatting Instructions. */ + case FPACK16_OPF: + case FPACK32_OPF: + case FPACKFIX_OPF: + case FEXPAND_OPF: + case FPMERGE_OPF: + pformat(regs, insn, opf); + break; + + /* Partitioned Multiply Instructions */ + case FMUL8x16_OPF: + case FMUL8x16AU_OPF: + case FMUL8x16AL_OPF: + case FMUL8SUx16_OPF: + case FMUL8ULx16_OPF: + case FMULD8SUx16_OPF: + case FMULD8ULx16_OPF: + pmul(regs, insn, opf); + break; + + /* Pixel Compare Instructions */ + case FCMPGT16_OPF: + case FCMPGT32_OPF: + case FCMPLE16_OPF: + case FCMPLE32_OPF: + case FCMPNE16_OPF: + case FCMPNE32_OPF: + case FCMPEQ16_OPF: + case FCMPEQ32_OPF: + pcmp(regs, insn, opf); + break; + + /* Edge Handling Instructions */ + case EDGE8_OPF: + case EDGE8N_OPF: + case EDGE8L_OPF: + case EDGE8LN_OPF: + case EDGE16_OPF: + case EDGE16N_OPF: + case EDGE16L_OPF: + case EDGE16LN_OPF: + case EDGE32_OPF: + case EDGE32N_OPF: + case EDGE32L_OPF: + case EDGE32LN_OPF: + edge(regs, insn, opf); + break; + + /* Pixel Component Distance */ + case PDIST_OPF: + pdist(regs, insn); + break; + + /* Three-Dimensional Array Addressing Instructions */ + case ARRAY8_OPF: + case ARRAY16_OPF: + case ARRAY32_OPF: + array(regs, insn, opf); + break; + + /* Byte Mask and Shuffle Instructions */ + case BMASK_OPF: + bmask(regs, insn); + break; + + case BSHUFFLE_OPF: + bshuffle(regs, insn); + break; + }; + + regs->tpc = regs->tnpc; + regs->tnpc += 4; + return 0; +} diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 467d13a..b097379 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -70,6 +70,22 @@ SECTIONS .con_initcall.init : { *(.con_initcall.init) } __con_initcall_end = .; SECURITY_INIT + . = ALIGN(4); + __tsb_ldquad_phys_patch = .; + .tsb_ldquad_phys_patch : { *(.tsb_ldquad_phys_patch) } + __tsb_ldquad_phys_patch_end = .; + __tsb_phys_patch = .; + .tsb_phys_patch : { *(.tsb_phys_patch) } + __tsb_phys_patch_end = .; + __cpuid_patch = .; + .cpuid_patch : { *(.cpuid_patch) } + __cpuid_patch_end = .; + __sun4v_1insn_patch = .; + .sun4v_1insn_patch : { *(.sun4v_1insn_patch) } + __sun4v_1insn_patch_end = .; + __sun4v_2insn_patch = .; + .sun4v_2insn_patch : { *(.sun4v_2insn_patch) } + __sun4v_2insn_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index 3916092..c4aa110 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -1,8 +1,6 @@ -/* $Id: winfixup.S,v 1.30 2002/02/09 19:49:30 davem Exp $ +/* winfixup.S: Handle cases where user stack pointer is found to be bogus. * - * winfixup.S: Handle cases where user stack pointer is found to be bogus. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997, 2006 David S. Miller (davem@davemloft.net) */ #include <asm/asi.h> @@ -15,374 +13,144 @@ .text -set_pcontext: - sethi %hi(sparc64_kern_pri_context), %l1 - ldx [%l1 + %lo(sparc64_kern_pri_context)], %l1 - mov PRIMARY_CONTEXT, %g1 - stxa %l1, [%g1] ASI_DMMU - flush %g6 - retl - nop + /* It used to be the case that these register window fault + * handlers could run via the save and restore instructions + * done by the trap entry and exit code. They now do the + * window spill/fill by hand, so that case no longer can occur. + */ .align 32 - - /* Here are the rules, pay attention. - * - * The kernel is disallowed from touching user space while - * the trap level is greater than zero, except for from within - * the window spill/fill handlers. This must be followed - * so that we can easily detect the case where we tried to - * spill/fill with a bogus (or unmapped) user stack pointer. - * - * These are layed out in a special way for cache reasons, - * don't touch... - */ - .globl fill_fixup, spill_fixup fill_fixup: - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 - or %g4, FAULT_CODE_WINFIXUP, %g4 - be,pt %xcc, window_scheisse_from_user_common - and %g1, TSTATE_CWP, %g1 - - /* This is the extremely complex case, but it does happen from - * time to time if things are just right. Essentially the restore - * done in rtrap right before going back to user mode, with tl=1 - * and that levels trap stack registers all setup, took a fill trap, - * the user stack was not mapped in the tlb, and tlb miss occurred, - * the pte found was not valid, and a simple ref bit watch update - * could not satisfy the miss, so we got here. - * - * We must carefully unwind the state so we get back to tl=0, preserve - * all the register values we were going to give to the user. Luckily - * most things are where they need to be, we also have the address - * which triggered the fault handy as well. - * - * Also note that we must preserve %l5 and %l6. If the user was - * returning from a system call, we must make it look this way - * after we process the fill fault on the users stack. - * - * First, get into the window where the original restore was executed. - */ - - rdpr %wstate, %g2 ! Grab user mode wstate. - wrpr %g1, %cwp ! Get into the right window. - sll %g2, 3, %g2 ! NORMAL-->OTHER - - wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. - wrpr %g2, 0x0, %wstate ! This must be consistent. - wrpr %g0, 0x0, %otherwin ! We know this. - call set_pcontext ! Change contexts... + TRAP_LOAD_THREAD_REG(%g6, %g1) + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap_clr_l6 nop - rdpr %pstate, %l1 ! Prepare to change globals. - mov %g6, %o7 ! Get current. - - andn %l1, PSTATE_MM, %l1 ! We want to be in RMO - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] - wrpr %g0, 0x0, %tl ! Out of trap levels. - wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate - mov %o7, %g6 - ldx [%g6 + TI_TASK], %g4 -#ifdef CONFIG_SMP - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif - /* This is the same as below, except we handle this a bit special - * since we must preserve %l5 and %l6, see comment above. - */ - call do_sparc64_fault - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop ! yes, nop is correct - - /* Be very careful about usage of the alternate globals here. - * You cannot touch %g4/%g5 as that has the fault information - * should this be from usermode. Also be careful for the case - * where we get here from the save instruction in etrap.S when - * coming from either user or kernel (does not matter which, it - * is the same problem in both cases). Essentially this means - * do not touch %g7 or %g2 so we handle the two cases fine. + /* Be very careful about usage of the trap globals here. + * You cannot touch %g5 as that has the fault information. */ spill_fixup: - ldx [%g6 + TI_FLAGS], %g1 - andcc %g1, _TIF_32BIT, %g0 - ldub [%g6 + TI_WSAVED], %g1 - - sll %g1, 3, %g3 - add %g6, %g3, %g3 - stx %sp, [%g3 + TI_RWIN_SPTRS] - sll %g1, 7, %g3 - bne,pt %xcc, 1f - add %g6, %g3, %g3 - stx %l0, [%g3 + TI_REG_WINDOW + 0x00] - stx %l1, [%g3 + TI_REG_WINDOW + 0x08] - - stx %l2, [%g3 + TI_REG_WINDOW + 0x10] - stx %l3, [%g3 + TI_REG_WINDOW + 0x18] - stx %l4, [%g3 + TI_REG_WINDOW + 0x20] - stx %l5, [%g3 + TI_REG_WINDOW + 0x28] - stx %l6, [%g3 + TI_REG_WINDOW + 0x30] - stx %l7, [%g3 + TI_REG_WINDOW + 0x38] - stx %i0, [%g3 + TI_REG_WINDOW + 0x40] - stx %i1, [%g3 + TI_REG_WINDOW + 0x48] - - stx %i2, [%g3 + TI_REG_WINDOW + 0x50] - stx %i3, [%g3 + TI_REG_WINDOW + 0x58] - stx %i4, [%g3 + TI_REG_WINDOW + 0x60] - stx %i5, [%g3 + TI_REG_WINDOW + 0x68] - stx %i6, [%g3 + TI_REG_WINDOW + 0x70] - b,pt %xcc, 2f - stx %i7, [%g3 + TI_REG_WINDOW + 0x78] -1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00] - - stw %l1, [%g3 + TI_REG_WINDOW + 0x04] - stw %l2, [%g3 + TI_REG_WINDOW + 0x08] - stw %l3, [%g3 + TI_REG_WINDOW + 0x0c] - stw %l4, [%g3 + TI_REG_WINDOW + 0x10] - stw %l5, [%g3 + TI_REG_WINDOW + 0x14] - stw %l6, [%g3 + TI_REG_WINDOW + 0x18] - stw %l7, [%g3 + TI_REG_WINDOW + 0x1c] - stw %i0, [%g3 + TI_REG_WINDOW + 0x20] - - stw %i1, [%g3 + TI_REG_WINDOW + 0x24] - stw %i2, [%g3 + TI_REG_WINDOW + 0x28] - stw %i3, [%g3 + TI_REG_WINDOW + 0x2c] - stw %i4, [%g3 + TI_REG_WINDOW + 0x30] - stw %i5, [%g3 + TI_REG_WINDOW + 0x34] - stw %i6, [%g3 + TI_REG_WINDOW + 0x38] - stw %i7, [%g3 + TI_REG_WINDOW + 0x3c] -2: add %g1, 1, %g1 - - stb %g1, [%g6 + TI_WSAVED] - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 +spill_fixup_mna: +spill_fixup_dax: + TRAP_LOAD_THREAD_REG(%g6, %g1) + ldx [%g6 + TI_FLAGS], %g1 + andcc %g1, _TIF_32BIT, %g0 + ldub [%g6 + TI_WSAVED], %g1 + sll %g1, 3, %g3 + add %g6, %g3, %g3 + stx %sp, [%g3 + TI_RWIN_SPTRS] + sll %g1, 7, %g3 + bne,pt %xcc, 1f + add %g6, %g3, %g3 + stx %l0, [%g3 + TI_REG_WINDOW + 0x00] + stx %l1, [%g3 + TI_REG_WINDOW + 0x08] + stx %l2, [%g3 + TI_REG_WINDOW + 0x10] + stx %l3, [%g3 + TI_REG_WINDOW + 0x18] + stx %l4, [%g3 + TI_REG_WINDOW + 0x20] + stx %l5, [%g3 + TI_REG_WINDOW + 0x28] + stx %l6, [%g3 + TI_REG_WINDOW + 0x30] + stx %l7, [%g3 + TI_REG_WINDOW + 0x38] + stx %i0, [%g3 + TI_REG_WINDOW + 0x40] + stx %i1, [%g3 + TI_REG_WINDOW + 0x48] + stx %i2, [%g3 + TI_REG_WINDOW + 0x50] + stx %i3, [%g3 + TI_REG_WINDOW + 0x58] + stx %i4, [%g3 + TI_REG_WINDOW + 0x60] + stx %i5, [%g3 + TI_REG_WINDOW + 0x68] + stx %i6, [%g3 + TI_REG_WINDOW + 0x70] + ba,pt %xcc, 2f + stx %i7, [%g3 + TI_REG_WINDOW + 0x78] +1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00] + stw %l1, [%g3 + TI_REG_WINDOW + 0x04] + stw %l2, [%g3 + TI_REG_WINDOW + 0x08] + stw %l3, [%g3 + TI_REG_WINDOW + 0x0c] + stw %l4, [%g3 + TI_REG_WINDOW + 0x10] + stw %l5, [%g3 + TI_REG_WINDOW + 0x14] + stw %l6, [%g3 + TI_REG_WINDOW + 0x18] + stw %l7, [%g3 + TI_REG_WINDOW + 0x1c] + stw %i0, [%g3 + TI_REG_WINDOW + 0x20] + stw %i1, [%g3 + TI_REG_WINDOW + 0x24] + stw %i2, [%g3 + TI_REG_WINDOW + 0x28] + stw %i3, [%g3 + TI_REG_WINDOW + 0x2c] + stw %i4, [%g3 + TI_REG_WINDOW + 0x30] + stw %i5, [%g3 + TI_REG_WINDOW + 0x34] + stw %i6, [%g3 + TI_REG_WINDOW + 0x38] + stw %i7, [%g3 + TI_REG_WINDOW + 0x3c] +2: add %g1, 1, %g1 + stb %g1, [%g6 + TI_WSAVED] + rdpr %tstate, %g1 + andcc %g1, TSTATE_PRIV, %g0 saved - and %g1, TSTATE_CWP, %g1 - be,pn %xcc, window_scheisse_from_user_common - mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4 + be,pn %xcc, 1f + and %g1, TSTATE_CWP, %g1 retry +1: mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_clr_l6 -window_scheisse_from_user_common: - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] - wrpr %g1, %cwp - ba,pt %xcc, etrap - rd %pc, %g7 - call do_sparc64_fault - add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 - - .globl winfix_mna, fill_fixup_mna, spill_fixup_mna winfix_mna: - andn %g3, 0x7f, %g3 - add %g3, 0x78, %g3 - wrpr %g3, %tnpc + andn %g3, 0x7f, %g3 + add %g3, 0x78, %g3 + wrpr %g3, %tnpc done -fill_fixup_mna: - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 - be,pt %xcc, window_mna_from_user_common - and %g1, TSTATE_CWP, %g1 - /* Please, see fill_fixup commentary about why we must preserve - * %l5 and %l6 to preserve absolute correct semantics. - */ - rdpr %wstate, %g2 ! Grab user mode wstate. - wrpr %g1, %cwp ! Get into the right window. - sll %g2, 3, %g2 ! NORMAL-->OTHER - wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. - - wrpr %g2, 0x0, %wstate ! This must be consistent. - wrpr %g0, 0x0, %otherwin ! We know this. - call set_pcontext ! Change contexts... +fill_fixup_mna: + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + mov %l4, %o2 + call sun4v_do_mna + mov %l5, %o1 + ba,a,pt %xcc, rtrap_clr_l6 +1: mov %l4, %o1 + mov %l5, %o2 + call mem_address_unaligned nop - rdpr %pstate, %l1 ! Prepare to change globals. - mov %g4, %o2 ! Setup args for - mov %g5, %o1 ! final call to mem_address_unaligned. - andn %l1, PSTATE_MM, %l1 ! We want to be in RMO + ba,a,pt %xcc, rtrap_clr_l6 - mov %g6, %o7 ! Stash away current. - wrpr %g0, 0x0, %tl ! Out of trap levels. - wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate - mov %o7, %g6 ! Get current back. - ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif - call mem_address_unaligned - add %sp, PTREGS_OFF, %o0 - - b,pt %xcc, rtrap - nop ! yes, the nop is correct -spill_fixup_mna: - ldx [%g6 + TI_FLAGS], %g1 - andcc %g1, _TIF_32BIT, %g0 - ldub [%g6 + TI_WSAVED], %g1 - sll %g1, 3, %g3 - add %g6, %g3, %g3 - stx %sp, [%g3 + TI_RWIN_SPTRS] - - sll %g1, 7, %g3 - bne,pt %xcc, 1f - add %g6, %g3, %g3 - stx %l0, [%g3 + TI_REG_WINDOW + 0x00] - stx %l1, [%g3 + TI_REG_WINDOW + 0x08] - stx %l2, [%g3 + TI_REG_WINDOW + 0x10] - stx %l3, [%g3 + TI_REG_WINDOW + 0x18] - stx %l4, [%g3 + TI_REG_WINDOW + 0x20] - - stx %l5, [%g3 + TI_REG_WINDOW + 0x28] - stx %l6, [%g3 + TI_REG_WINDOW + 0x30] - stx %l7, [%g3 + TI_REG_WINDOW + 0x38] - stx %i0, [%g3 + TI_REG_WINDOW + 0x40] - stx %i1, [%g3 + TI_REG_WINDOW + 0x48] - stx %i2, [%g3 + TI_REG_WINDOW + 0x50] - stx %i3, [%g3 + TI_REG_WINDOW + 0x58] - stx %i4, [%g3 + TI_REG_WINDOW + 0x60] - - stx %i5, [%g3 + TI_REG_WINDOW + 0x68] - stx %i6, [%g3 + TI_REG_WINDOW + 0x70] - stx %i7, [%g3 + TI_REG_WINDOW + 0x78] - b,pt %xcc, 2f - add %g1, 1, %g1 -1: std %l0, [%g3 + TI_REG_WINDOW + 0x00] - std %l2, [%g3 + TI_REG_WINDOW + 0x08] - std %l4, [%g3 + TI_REG_WINDOW + 0x10] - - std %l6, [%g3 + TI_REG_WINDOW + 0x18] - std %i0, [%g3 + TI_REG_WINDOW + 0x20] - std %i2, [%g3 + TI_REG_WINDOW + 0x28] - std %i4, [%g3 + TI_REG_WINDOW + 0x30] - std %i6, [%g3 + TI_REG_WINDOW + 0x38] - add %g1, 1, %g1 -2: stb %g1, [%g6 + TI_WSAVED] - rdpr %tstate, %g1 - - andcc %g1, TSTATE_PRIV, %g0 - saved - be,pn %xcc, window_mna_from_user_common - and %g1, TSTATE_CWP, %g1 - retry -window_mna_from_user_common: - wrpr %g1, %cwp - sethi %hi(109f), %g7 - ba,pt %xcc, etrap -109: or %g7, %lo(109b), %g7 - mov %l4, %o2 - mov %l5, %o1 - call mem_address_unaligned - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 - - /* These are only needed for 64-bit mode processes which - * put their stack pointer into the VPTE area and there - * happens to be a VPTE tlb entry mapped there during - * a spill/fill trap to that stack frame. - */ - .globl winfix_dax, fill_fixup_dax, spill_fixup_dax winfix_dax: - andn %g3, 0x7f, %g3 - add %g3, 0x74, %g3 - wrpr %g3, %tnpc + andn %g3, 0x7f, %g3 + add %g3, 0x74, %g3 + wrpr %g3, %tnpc done -fill_fixup_dax: - rdpr %tstate, %g1 - andcc %g1, TSTATE_PRIV, %g0 - be,pt %xcc, window_dax_from_user_common - and %g1, TSTATE_CWP, %g1 - - /* Please, see fill_fixup commentary about why we must preserve - * %l5 and %l6 to preserve absolute correct semantics. - */ - rdpr %wstate, %g2 ! Grab user mode wstate. - wrpr %g1, %cwp ! Get into the right window. - sll %g2, 3, %g2 ! NORMAL-->OTHER - wrpr %g0, 0x0, %canrestore ! Standard etrap stuff. - wrpr %g2, 0x0, %wstate ! This must be consistent. - wrpr %g0, 0x0, %otherwin ! We know this. - call set_pcontext ! Change contexts... +fill_fixup_dax: + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + sethi %hi(tlb_type), %g1 + mov %l4, %o1 + lduw [%g1 + %lo(tlb_type)], %g1 + mov %l5, %o2 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + call sun4v_data_access_exception nop - rdpr %pstate, %l1 ! Prepare to change globals. - mov %g4, %o1 ! Setup args for - mov %g5, %o2 ! final call to spitfire_data_access_exception. - andn %l1, PSTATE_MM, %l1 ! We want to be in RMO - - mov %g6, %o7 ! Stash away current. - wrpr %g0, 0x0, %tl ! Out of trap levels. - wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate - mov %o7, %g6 ! Get current back. - ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif - call spitfire_data_access_exception - add %sp, PTREGS_OFF, %o0 - - b,pt %xcc, rtrap - nop ! yes, the nop is correct -spill_fixup_dax: - ldx [%g6 + TI_FLAGS], %g1 - andcc %g1, _TIF_32BIT, %g0 - ldub [%g6 + TI_WSAVED], %g1 - sll %g1, 3, %g3 - add %g6, %g3, %g3 - stx %sp, [%g3 + TI_RWIN_SPTRS] - - sll %g1, 7, %g3 - bne,pt %xcc, 1f - add %g6, %g3, %g3 - stx %l0, [%g3 + TI_REG_WINDOW + 0x00] - stx %l1, [%g3 + TI_REG_WINDOW + 0x08] - stx %l2, [%g3 + TI_REG_WINDOW + 0x10] - stx %l3, [%g3 + TI_REG_WINDOW + 0x18] - stx %l4, [%g3 + TI_REG_WINDOW + 0x20] - - stx %l5, [%g3 + TI_REG_WINDOW + 0x28] - stx %l6, [%g3 + TI_REG_WINDOW + 0x30] - stx %l7, [%g3 + TI_REG_WINDOW + 0x38] - stx %i0, [%g3 + TI_REG_WINDOW + 0x40] - stx %i1, [%g3 + TI_REG_WINDOW + 0x48] - stx %i2, [%g3 + TI_REG_WINDOW + 0x50] - stx %i3, [%g3 + TI_REG_WINDOW + 0x58] - stx %i4, [%g3 + TI_REG_WINDOW + 0x60] - - stx %i5, [%g3 + TI_REG_WINDOW + 0x68] - stx %i6, [%g3 + TI_REG_WINDOW + 0x70] - stx %i7, [%g3 + TI_REG_WINDOW + 0x78] - b,pt %xcc, 2f - add %g1, 1, %g1 -1: std %l0, [%g3 + TI_REG_WINDOW + 0x00] - std %l2, [%g3 + TI_REG_WINDOW + 0x08] - std %l4, [%g3 + TI_REG_WINDOW + 0x10] - - std %l6, [%g3 + TI_REG_WINDOW + 0x18] - std %i0, [%g3 + TI_REG_WINDOW + 0x20] - std %i2, [%g3 + TI_REG_WINDOW + 0x28] - std %i4, [%g3 + TI_REG_WINDOW + 0x30] - std %i6, [%g3 + TI_REG_WINDOW + 0x38] - add %g1, 1, %g1 -2: stb %g1, [%g6 + TI_WSAVED] - rdpr %tstate, %g1 - - andcc %g1, TSTATE_PRIV, %g0 - saved - be,pn %xcc, window_dax_from_user_common - and %g1, TSTATE_CWP, %g1 - retry -window_dax_from_user_common: - wrpr %g1, %cwp - sethi %hi(109f), %g7 - ba,pt %xcc, etrap -109: or %g7, %lo(109b), %g7 - mov %l4, %o1 - mov %l5, %o2 - call spitfire_data_access_exception - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 + ba,a,pt %xcc, rtrap_clr_l6 +1: call spitfire_data_access_exception + nop + ba,a,pt %xcc, rtrap_clr_l6 diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index c295806..8812ded 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -11,6 +11,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ VISsave.o atomic.o bitops.o \ U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ + NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ + NGpage.o NGbzero.o \ copy_in_user.o user_fixup.o memmove.o \ mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o diff --git a/arch/sparc64/lib/NGbzero.S b/arch/sparc64/lib/NGbzero.S new file mode 100644 index 0000000..e86baec --- /dev/null +++ b/arch/sparc64/lib/NGbzero.S @@ -0,0 +1,163 @@ +/* NGbzero.S: Niagara optimized memset/clear_user. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ +#include <asm/asi.h> + +#define EX_ST(x,y) \ +98: x,y; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov %o1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + + .text + + .globl NGmemset + .type NGmemset, #function +NGmemset: /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %xcc, 1f + or %g1, %o2, %o2 + + .globl NGbzero + .type NGbzero, #function +NGbzero: + clr %o2 +1: brz,pn %o1, NGbzero_return + mov %o0, %o3 + + /* %o5: saved %asi, restored at NGbzero_done + * %g7: store-init %asi to use + * %o4: non-store-init %asi to use + */ + rd %asi, %o5 + mov ASI_BLK_INIT_QUAD_LDD_P, %g7 + mov ASI_P, %o4 + wr %o4, 0x0, %asi + +NGbzero_from_clear_user: + cmp %o1, 15 + bl,pn %icc, NGbzero_tiny + andcc %o0, 0x7, %g1 + be,pt %xcc, 2f + mov 8, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: EX_ST(stba %o2, [%o0 + 0x00] %asi) + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %o0, 1, %o0 +2: cmp %o1, 128 + bl,pn %icc, NGbzero_medium + andcc %o0, (64 - 1), %g1 + be,pt %xcc, NGbzero_pre_loop + mov 64, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %xcc, 1b + add %o0, 8, %o0 + +NGbzero_pre_loop: + wr %g7, 0x0, %asi + andn %o1, (64 - 1), %g1 + sub %o1, %g1, %o1 +NGbzero_loop: + EX_ST(stxa %o2, [%o0 + 0x00] %asi) + EX_ST(stxa %o2, [%o0 + 0x08] %asi) + EX_ST(stxa %o2, [%o0 + 0x10] %asi) + EX_ST(stxa %o2, [%o0 + 0x18] %asi) + EX_ST(stxa %o2, [%o0 + 0x20] %asi) + EX_ST(stxa %o2, [%o0 + 0x28] %asi) + EX_ST(stxa %o2, [%o0 + 0x30] %asi) + EX_ST(stxa %o2, [%o0 + 0x38] %asi) + subcc %g1, 64, %g1 + bne,pt %xcc, NGbzero_loop + add %o0, 64, %o0 + + wr %o4, 0x0, %asi + brz,pn %o1, NGbzero_done +NGbzero_medium: + andncc %o1, 0x7, %g1 + be,pn %xcc, 2f + sub %o1, %g1, %o1 +1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) + subcc %g1, 8, %g1 + bne,pt %xcc, 1b + add %o0, 8, %o0 +2: brz,pt %o1, NGbzero_done + nop + +NGbzero_tiny: +1: EX_ST(stba %o2, [%o0 + 0x00] %asi) + subcc %o1, 1, %o1 + bne,pt %icc, 1b + add %o0, 1, %o0 + + /* fallthrough */ + +NGbzero_done: + wr %o5, 0x0, %asi + +NGbzero_return: + retl + mov %o3, %o0 + .size NGbzero, .-NGbzero + .size NGmemset, .-NGmemset + + .globl NGclear_user + .type NGclear_user, #function +NGclear_user: /* %o0=buf, %o1=len */ + rd %asi, %o5 + brz,pn %o1, NGbzero_done + clr %o3 + cmp %o5, ASI_AIUS + bne,pn %icc, NGbzero + clr %o2 + mov ASI_BLK_INIT_QUAD_LDD_AIUS, %g7 + ba,pt %xcc, NGbzero_from_clear_user + mov ASI_AIUS, %o4 + .size NGclear_user, .-NGclear_user + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara_patch_bzero + .type niagara_patch_bzero,#function +niagara_patch_bzero: + NG_DO_PATCH(memset, NGmemset) + NG_DO_PATCH(__bzero, NGbzero) + NG_DO_PATCH(__clear_user, NGclear_user) + NG_DO_PATCH(tsb_init, NGtsb_init) + retl + nop + .size niagara_patch_bzero,.-niagara_patch_bzero diff --git a/arch/sparc64/lib/NGcopy_from_user.S b/arch/sparc64/lib/NGcopy_from_user.S new file mode 100644 index 0000000..2d93456 --- /dev/null +++ b/arch/sparc64/lib/NGcopy_from_user.S @@ -0,0 +1,37 @@ +/* NGcopy_from_user.S: Niagara optimized copy from userspace. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME NGcopy_from_user +#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest +#define LOAD_TWIN(addr_reg,dest0,dest1) \ + ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NGmemcpy.S" diff --git a/arch/sparc64/lib/NGcopy_to_user.S b/arch/sparc64/lib/NGcopy_to_user.S new file mode 100644 index 0000000..34112d5 --- /dev/null +++ b/arch/sparc64/lib/NGcopy_to_user.S @@ -0,0 +1,40 @@ +/* NGcopy_to_user.S: Niagara optimized copy to userspace. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME NGcopy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NGmemcpy.S" diff --git a/arch/sparc64/lib/NGmemcpy.S b/arch/sparc64/lib/NGmemcpy.S new file mode 100644 index 0000000..8e522b3 --- /dev/null +++ b/arch/sparc64/lib/NGmemcpy.S @@ -0,0 +1,368 @@ +/* NGmemcpy.S: Niagara optimized memcpy. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include <asm/asi.h> +#include <asm/thread_info.h> +#define GLOBAL_SPARE %g7 +#define RESTORE_ASI(TMP) \ + ldub [%g6 + TI_CURRENT_DS], TMP; \ + wr TMP, 0x0, %asi; +#else +#define GLOBAL_SPARE %g5 +#define RESTORE_ASI(TMP) \ + wr %g0, ASI_PNF, %asi +#endif + +#ifndef STORE_ASI +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#ifndef MEMCPY_DEBUG +#define LOAD(type,addr,dest) type [addr], dest +#else +#define LOAD(type,addr,dest) type##a [addr] 0x80, dest +#endif +#endif + +#ifndef LOAD_TWIN +#define LOAD_TWIN(addr_reg,dest0,dest1) \ + ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 +#endif + +#ifndef STORE +#define STORE(type,src,addr) type src, [addr] +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] %asi +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME NGmemcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %xcc, 5 + PREAMBLE + mov %o0, GLOBAL_SPARE + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + */ + cmp %o2, (2 * 64) + blu,pt %XCC, 70f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops will use %o4/%o5,%g2/%g3 as + * temporaries while copying the data. + */ + + LOAD(prefetch, %o1, #one_read) + wr %g0, STORE_ASI, %asi + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o0)) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + + /* If the source is on a 16-byte boundary we can do + * the direct block copy loop. If it is 8-byte aligned + * we can do the 16-byte loads offset by -8 bytes and the + * init stores offset by one register. + * + * If the source is not even 8-byte aligned, we need to do + * shifting and masking (basically integer faligndata). + * + * The careful bit with init stores is that if we store + * to any part of the cache line we have to store the whole + * cacheline else we can end up with corrupt L2 cache line + * contents. Since the loop works on 64-bytes of 64-byte + * aligned store data at a time, this is easy to ensure. + */ +2: + andcc %o1, (16 - 1), %o4 + andn %o2, (64 - 1), %g1 ! block copy loop iterator + sub %o2, %g1, %o2 ! final sub-block copy bytes + be,pt %XCC, 50f + cmp %o4, 8 + be,a,pt %XCC, 10f + sub %o1, 0x8, %o1 + + /* Neither 8-byte nor 16-byte aligned, shift and mask. */ + mov %g1, %o4 + and %o1, 0x7, %g1 + sll %g1, 3, %g1 + mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + sllx %g2, %g1, %g2 + +#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ + EX_LD(LOAD(ldx, SRC, TMP1)); \ + srlx TMP1, PRE_SHIFT, TMP2; \ + or TMP2, PRE_VAL, TMP2; \ + EX_ST(STORE_INIT(TMP2, DST)); \ + sllx TMP1, POST_SHIFT, PRE_VAL; + +1: add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) + add %o1, 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32 - 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) + subcc %o4, 64, %o4 + bne,pt %XCC, 1b + add %o0, 64, %o0 + +#undef SWIVEL_ONE_DWORD + + srl %g1, 3, %g1 + ba,pt %XCC, 60f + add %o1, %g1, %o1 + +10: /* Destination is 64-byte aligned, source was only 8-byte + * aligned but it has been subtracted by 8 and we perform + * one twin load ahead, then add 8 back into source when + * we finish the loop. + */ + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) +1: add %o1, 16, %o1 + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line + EX_ST(STORE_INIT(%g2, %o0 + 0x08)) + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%g3, %o0 + 0x10)) + EX_ST(STORE_INIT(%o4, %o0 + 0x18)) + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%o5, %o0 + 0x20)) + EX_ST(STORE_INIT(%g2, %o0 + 0x28)) + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + EX_ST(STORE_INIT(%g3, %o0 + 0x30)) + EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + + ba,pt %XCC, 60f + add %o1, 0x8, %o1 + +50: /* Destination is 64-byte aligned, and source is 16-byte + * aligned. + */ +1: EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + add %o1, 16, %o1 + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08)) + EX_LD(LOAD_TWIN(%o1, %o4, %o5)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%g2, %o0 + 0x10)) + EX_ST(STORE_INIT(%g3, %o0 + 0x18)) + EX_LD(LOAD_TWIN(%o1, %g2, %g3)) + add %o1, 16, %o1 + EX_ST(STORE_INIT(%o4, %o0 + 0x20)) + EX_ST(STORE_INIT(%o5, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x30)) + EX_ST(STORE_INIT(%g3, %o0 + 0x38)) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + /* fall through */ + +60: + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + RESTORE_ASI(%o3) + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +70: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + EX_LD(LOAD(ldx, %o1, %o5)) + add %o1, 0x08, %o1 + EX_LD(LOAD(ldx, %o1, %g1)) + sub %o1, 0x08, %o1 + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 + EX_ST(STORE(stx, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + EX_LD(LOAD(ldx, %o1, %o5)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + EX_LD(LOAD(lduw, %o1, %o5)) + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + EX_LD(LOAD(ldub, %o1, %o5)) + EX_ST(STORE(stb, %o5, %o1 + %o3)) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + EX_LD(LOAD(ldx, %o1, %g3)) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + EX_ST(STORE(stx, %o5, %o0)) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + EX_LD(LOAD(lduw, %o1, %g1)) + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S new file mode 100644 index 0000000..7d7c3bb --- /dev/null +++ b/arch/sparc64/lib/NGpage.S @@ -0,0 +1,96 @@ +/* NGpage.S: Niagara optimize clear and copy page. + * + * Copyright (C) 2006 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + + .text + .align 32 + + /* This is heavily simplified from the sun4u variants + * because Niagara does not have any D-cache aliasing issues + * and also we don't need to use the FPU in order to implement + * an optimal page copy/clear. + */ + +NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x00], #one_read + mov 8, %g1 + mov 16, %g2 + mov 24, %g3 + set PAGE_SIZE, %g7 + +1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 + ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 + prefetch [%o1 + 0x40], #one_read + add %o1, 32, %o1 + stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 + add %o1, 32, %o1 + add %o0, 32, %o0 + stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + subcc %g7, 64, %g7 + bne,pt %xcc, 1b + add %o0, 32, %o0 + retl + nop + +NGclear_page: /* %o0=dest */ +NGclear_user_page: /* %o0=dest, %o1=vaddr */ + mov 8, %g1 + mov 16, %g2 + mov 24, %g3 + set PAGE_SIZE, %g7 + +1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 32, %o0 + stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + subcc %g7, 64, %g7 + bne,pt %xcc, 1b + add %o0, 32, %o0 + retl + nop + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara_patch_pageops + .type niagara_patch_pageops,#function +niagara_patch_pageops: + NG_DO_PATCH(copy_user_page, NGcopy_user_page) + NG_DO_PATCH(_clear_page, NGclear_page) + NG_DO_PATCH(clear_user_page, NGclear_user_page) + retl + nop + .size niagara_patch_pageops,.-niagara_patch_pageops diff --git a/arch/sparc64/lib/NGpatch.S b/arch/sparc64/lib/NGpatch.S new file mode 100644 index 0000000..3b0674f --- /dev/null +++ b/arch/sparc64/lib/NGpatch.S @@ -0,0 +1,33 @@ +/* NGpatch.S: Patch Ultra-I routines with Niagara variant. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara_patch_copyops + .type niagara_patch_copyops,#function +niagara_patch_copyops: + NG_DO_PATCH(memcpy, NGmemcpy) + NG_DO_PATCH(___copy_from_user, NGcopy_from_user) + NG_DO_PATCH(___copy_to_user, NGcopy_to_user) + retl + nop + .size niagara_patch_copyops,.-niagara_patch_copyops diff --git a/arch/sparc64/lib/U3patch.S b/arch/sparc64/lib/U3patch.S index e2b6c5e..ecc3026 100644 --- a/arch/sparc64/lib/U3patch.S +++ b/arch/sparc64/lib/U3patch.S @@ -12,7 +12,8 @@ or %g2, %lo(OLD), %g2; \ sub %g1, %g2, %g1; \ sethi %hi(BRANCH_ALWAYS), %g3; \ - srl %g1, 2, %g1; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ or %g3, %lo(BRANCH_ALWAYS), %g3; \ or %g3, %g1, %g3; \ stw %g3, [%g2]; \ diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S index 1d2abcf..c7bbae8 100644 --- a/arch/sparc64/lib/bzero.S +++ b/arch/sparc64/lib/bzero.S @@ -98,12 +98,12 @@ __bzero_done: .text; \ .align 4; - .globl __bzero_noasi - .type __bzero_noasi, #function -__bzero_noasi: /* %o0=buf, %o1=len */ - brz,pn %o1, __bzero_noasi_done + .globl __clear_user + .type __clear_user, #function +__clear_user: /* %o0=buf, %o1=len */ + brz,pn %o1, __clear_user_done cmp %o1, 16 - bl,pn %icc, __bzero_noasi_tiny + bl,pn %icc, __clear_user_tiny EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) andcc %o0, 0x3, %g0 be,pt %icc, 2f @@ -145,14 +145,14 @@ __bzero_noasi: /* %o0=buf, %o1=len */ subcc %g1, 8, %g1 bne,pt %icc, 5b add %o0, 0x8, %o0 -6: brz,pt %o1, __bzero_noasi_done +6: brz,pt %o1, __clear_user_done nop -__bzero_noasi_tiny: +__clear_user_tiny: 1: EX_ST(stba %g0, [%o0 + 0x00] %asi) subcc %o1, 1, %o1 bne,pt %icc, 1b add %o0, 1, %o0 -__bzero_noasi_done: +__clear_user_done: retl clr %o0 - .size __bzero_noasi, .-__bzero_noasi + .size __clear_user, .-__clear_user diff --git a/arch/sparc64/lib/clear_page.S b/arch/sparc64/lib/clear_page.S index b59884e..77e531f 100644 --- a/arch/sparc64/lib/clear_page.S +++ b/arch/sparc64/lib/clear_page.S @@ -9,6 +9,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/spitfire.h> +#include <asm/head.h> /* What we used to do was lock a TLB entry into a specific * TLB slot, clear the page with interrupts disabled, then @@ -22,9 +23,6 @@ * disable preemption during the clear. */ -#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) -#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) - .text .globl _clear_page @@ -43,12 +41,11 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ sethi %hi(PAGE_SIZE), %o4 sllx %g2, 32, %g2 - sethi %uhi(TTE_BITS_TOP), %g3 + sethi %hi(PAGE_KERNEL_LOCKED), %g3 - sllx %g3, 32, %g3 + ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 sub %o0, %g2, %g1 ! paddr - or %g3, TTE_BITS_BOTTOM, %g3 and %o1, %o4, %o0 ! vaddr D-cache alias bit or %g1, %g3, %g1 ! TTE data @@ -66,7 +63,8 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ wrpr %o4, PSTATE_IE, %pstate stxa %o0, [%g3] ASI_DMMU stxa %g1, [%g0] ASI_DTLB_DATA_IN - flush %g6 + sethi %hi(KERNBASE), %g1 + flush %g1 wrpr %o4, 0x0, %pstate mov 1, %o4 diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S index feebb14..3746066 100644 --- a/arch/sparc64/lib/copy_page.S +++ b/arch/sparc64/lib/copy_page.S @@ -23,8 +23,6 @@ * disable preemption during the clear. */ -#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) -#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) #define DCACHE_SIZE (PAGE_SIZE * 2) #if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) @@ -52,13 +50,12 @@ copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ sethi %hi(PAGE_SIZE), %o3 sllx %g2, 32, %g2 - sethi %uhi(TTE_BITS_TOP), %g3 + sethi %hi(PAGE_KERNEL_LOCKED), %g3 - sllx %g3, 32, %g3 + ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 sub %o0, %g2, %g1 ! dest paddr sub %o1, %g2, %g2 ! src paddr - or %g3, TTE_BITS_BOTTOM, %g3 and %o2, %o3, %o0 ! vaddr D-cache alias bit or %g1, %g3, %g1 ! dest TTE data diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c index e880872..fb27e54 100644 --- a/arch/sparc64/lib/delay.c +++ b/arch/sparc64/lib/delay.c @@ -1,6 +1,6 @@ /* delay.c: Delay loops for sparc64 * - * Copyright (C) 2004 David S. Miller <davem@redhat.com> + * Copyright (C) 2004, 2006 David S. Miller <davem@davemloft.net> * * Based heavily upon x86 variant which is: * Copyright (C) 1993 Linus Torvalds @@ -8,19 +8,16 @@ */ #include <linux/delay.h> +#include <asm/timer.h> void __delay(unsigned long loops) { - __asm__ __volatile__( -" b,pt %%xcc, 1f\n" -" cmp %0, 0\n" -" .align 32\n" -"1:\n" -" bne,pt %%xcc, 1b\n" -" subcc %0, 1, %0\n" - : "=&r" (loops) - : "0" (loops) - : "cc"); + unsigned long bclock, now; + + bclock = tick_ops->get_tick(); + do { + now = tick_ops->get_tick(); + } while ((now-bclock) < loops); } /* We used to multiply by HZ after shifting down by 32 bits diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S index 4cd5d2b..a79c888 100644 --- a/arch/sparc64/lib/xor.S +++ b/arch/sparc64/lib/xor.S @@ -2,9 +2,10 @@ * arch/sparc64/lib/xor.S * * High speed xor_block operation for RAID4/5 utilizing the - * UltraSparc Visual Instruction Set. + * UltraSparc Visual Instruction Set and Niagara store-init/twin-load. * * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> */ #include <asm/visasm.h> @@ -19,6 +20,8 @@ */ .text .align 32 + + /* VIS versions. */ .globl xor_vis_2 .type xor_vis_2,#function xor_vis_2: @@ -352,3 +355,298 @@ xor_vis_5: ret restore .size xor_vis_5, .-xor_vis_5 + + /* Niagara versions. */ + .globl xor_niagara_2 + .type xor_niagara_2,#function +xor_niagara_2: /* %o0=bytes, %o1=dest, %o2=src */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src + 0x00 */ + ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src + 0x10 */ + ldda [%i1 + 0x20] %asi, %g2 /* %g2/%g3 = src + 0x20 */ + ldda [%i1 + 0x30] %asi, %l0 /* %l0/%l1 = src + 0x30 */ + prefetch [%i1 + 0x40], #one_read + ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ + ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ + ldda [%i0 + 0x20] %asi, %o4 /* %o4/%o5 = dest + 0x20 */ + ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ + prefetch [%i0 + 0x40], #n_writes + xor %o0, %i2, %o0 + xor %o1, %i3, %o1 + stxa %o0, [%i0 + 0x00] %asi + stxa %o1, [%i0 + 0x08] %asi + xor %o2, %i4, %o2 + xor %o3, %i5, %o3 + stxa %o2, [%i0 + 0x10] %asi + stxa %o3, [%i0 + 0x18] %asi + xor %o4, %g2, %o4 + xor %o5, %g3, %o5 + stxa %o4, [%i0 + 0x20] %asi + stxa %o5, [%i0 + 0x28] %asi + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x30] %asi + stxa %l3, [%i0 + 0x38] %asi + add %i0, 0x40, %i0 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %i1, 0x40, %i1 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_2, .-xor_niagara_2 + + .globl xor_niagara_3 + .type xor_niagara_3,#function +xor_niagara_3: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + prefetch [%i3], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 + mov %i3, %l7 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ + ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src1 + 0x10 */ + ldda [%l7 + 0x00] %asi, %g2 /* %g2/%g3 = src2 + 0x00 */ + ldda [%l7 + 0x10] %asi, %l0 /* %l0/%l1 = src2 + 0x10 */ + ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ + ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ + xor %g2, %i2, %g2 + xor %g3, %i3, %g3 + xor %o0, %g2, %o0 + xor %o1, %g3, %o1 + stxa %o0, [%i0 + 0x00] %asi + stxa %o1, [%i0 + 0x08] %asi + ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ + ldda [%l7 + 0x20] %asi, %g2 /* %g2/%g3 = src2 + 0x20 */ + ldda [%i0 + 0x20] %asi, %o0 /* %o0/%o1 = dest + 0x20 */ + xor %l0, %i4, %l0 + xor %l1, %i5, %l1 + xor %o2, %l0, %o2 + xor %o3, %l1, %o3 + stxa %o2, [%i0 + 0x10] %asi + stxa %o3, [%i0 + 0x18] %asi + ldda [%i1 + 0x30] %asi, %i4 /* %i4/%i5 = src1 + 0x30 */ + ldda [%l7 + 0x30] %asi, %l0 /* %l0/%l1 = src2 + 0x30 */ + ldda [%i0 + 0x30] %asi, %o2 /* %o2/%o3 = dest + 0x30 */ + prefetch [%i1 + 0x40], #one_read + prefetch [%l7 + 0x40], #one_read + prefetch [%i0 + 0x40], #n_writes + xor %g2, %i2, %g2 + xor %g3, %i3, %g3 + xor %o0, %g2, %o0 + xor %o1, %g3, %o1 + stxa %o0, [%i0 + 0x20] %asi + stxa %o1, [%i0 + 0x28] %asi + xor %l0, %i4, %l0 + xor %l1, %i5, %l1 + xor %o2, %l0, %o2 + xor %o3, %l1, %o3 + stxa %o2, [%i0 + 0x30] %asi + stxa %o3, [%i0 + 0x38] %asi + add %i0, 0x40, %i0 + add %i1, 0x40, %i1 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %l7, 0x40, %l7 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_3, .-xor_niagara_3 + + .globl xor_niagara_4 + .type xor_niagara_4,#function +xor_niagara_4: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + prefetch [%i3], #one_read + prefetch [%i4], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 + mov %i3, %l7 + mov %i4, %l6 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ + ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ + ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ + ldda [%i0 + 0x00] %asi, %l0 /* %l0/%l1 = dest + 0x00 */ + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%i7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x00] %asi + stxa %l1, [%i0 + 0x08] %asi + ldda [%i6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ + ldda [%i0 + 0x10] %asi, %l0 /* %l0/%l1 = dest + 0x10 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%i7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x10] %asi + stxa %l1, [%i0 + 0x18] %asi + ldda [%i6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ + ldda [%i0 + 0x20] %asi, %l0 /* %l0/%l1 = dest + 0x20 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%i7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x20] %asi + stxa %l1, [%i0 + 0x28] %asi + ldda [%i6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ + ldda [%i0 + 0x30] %asi, %l0 /* %l0/%l1 = dest + 0x30 */ + + prefetch [%i1 + 0x40], #one_read + prefetch [%l7 + 0x40], #one_read + prefetch [%l6 + 0x40], #one_read + prefetch [%i0 + 0x40], #n_writes + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + stxa %l0, [%i0 + 0x30] %asi + stxa %l1, [%i0 + 0x38] %asi + + add %i0, 0x40, %i0 + add %i1, 0x40, %i1 + add %l7, 0x40, %l7 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %l6, 0x40, %l6 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_4, .-xor_niagara_4 + + .globl xor_niagara_5 + .type xor_niagara_5,#function +xor_niagara_5: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ + save %sp, -192, %sp + prefetch [%i1], #n_writes + prefetch [%i2], #one_read + prefetch [%i3], #one_read + prefetch [%i4], #one_read + prefetch [%i5], #one_read + rd %asi, %g7 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + srlx %i0, 6, %g1 + mov %i1, %i0 + mov %i2, %i1 + mov %i3, %l7 + mov %i4, %l6 + mov %i5, %l5 +1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ + ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ + ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ + ldda [%l5 + 0x00] %asi, %l0 /* %l0/%l1 = src4 + 0x00 */ + ldda [%i0 + 0x00] %asi, %l2 /* %l2/%l3 = dest + 0x00 */ + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x00] %asi + stxa %l3, [%i0 + 0x08] %asi + ldda [%l5 + 0x10] %asi, %l0 /* %l0/%l1 = src4 + 0x10 */ + ldda [%i0 + 0x10] %asi, %l2 /* %l2/%l3 = dest + 0x10 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x10] %asi + stxa %l3, [%i0 + 0x18] %asi + ldda [%l5 + 0x20] %asi, %l0 /* %l0/%l1 = src4 + 0x20 */ + ldda [%i0 + 0x20] %asi, %l2 /* %l2/%l3 = dest + 0x20 */ + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x20] %asi + stxa %l3, [%i0 + 0x28] %asi + ldda [%l5 + 0x30] %asi, %l0 /* %l0/%l1 = src4 + 0x30 */ + ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ + + prefetch [%i1 + 0x40], #one_read + prefetch [%l7 + 0x40], #one_read + prefetch [%l6 + 0x40], #one_read + prefetch [%l5 + 0x40], #one_read + prefetch [%i0 + 0x40], #n_writes + + xor %i4, %i2, %i4 + xor %i5, %i3, %i5 + xor %g2, %i4, %g2 + xor %g3, %i5, %g3 + xor %l0, %g2, %l0 + xor %l1, %g3, %l1 + xor %l2, %l0, %l2 + xor %l3, %l1, %l3 + stxa %l2, [%i0 + 0x30] %asi + stxa %l3, [%i0 + 0x38] %asi + + add %i0, 0x40, %i0 + add %i1, 0x40, %i1 + add %l7, 0x40, %l7 + add %l6, 0x40, %l6 + subcc %g1, 1, %g1 + bne,pt %xcc, 1b + add %l5, 0x40, %l5 + membar #Sync + wr %g7, 0x0, %asi + ret + restore + .size xor_niagara_5, .-xor_niagara_5 diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c index 2ae05cd..6ee496c 100644 --- a/arch/sparc64/math-emu/math.c +++ b/arch/sparc64/math-emu/math.c @@ -206,9 +206,29 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f) case FSTOQ: TYPE(3,3,1,1,1,0,0); break; case FDTOQ: TYPE(3,3,1,2,1,0,0); break; case FQTOI: TYPE(3,1,0,3,1,0,0); break; + + /* We can get either unimplemented or unfinished + * for these cases. Pre-Niagara systems generate + * unfinished fpop for SUBNORMAL cases, and Niagara + * always gives unimplemented fpop for fsqrt{s,d}. + */ + case FSQRTS: { + unsigned long x = current_thread_info()->xfsr[0]; + + x = (x >> 14) & 0xf; + TYPE(x,1,1,1,1,0,0); + break; + } + + case FSQRTD: { + unsigned long x = current_thread_info()->xfsr[0]; + + x = (x >> 14) & 0xf; + TYPE(x,2,1,2,1,0,0); + break; + } + /* SUBNORMAL - ftt == 2 */ - case FSQRTS: TYPE(2,1,1,1,1,0,0); break; - case FSQRTD: TYPE(2,2,1,2,1,0,0); break; case FADDD: case FSUBD: case FMULD: diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile index 9d0960e..e415bf9 100644 --- a/arch/sparc64/mm/Makefile +++ b/arch/sparc64/mm/Makefile @@ -5,6 +5,6 @@ EXTRA_AFLAGS := -ansi EXTRA_CFLAGS := -Werror -obj-y := ultra.o tlb.o fault.o init.o generic.o +obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 6f0539a..63b6cc0 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -29,6 +29,7 @@ #include <asm/lsu.h> #include <asm/sections.h> #include <asm/kdebug.h> +#include <asm/mmu_context.h> /* * To debug kernel to catch accesses to certain virtual/physical addresses. @@ -91,12 +92,13 @@ static void __kprobes unhandled_fault(unsigned long address, die_if_kernel("Oops", regs); } -static void bad_kernel_pc(struct pt_regs *regs) +static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) { unsigned long *ksp; printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", regs->tpc); + printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); __asm__("mov %%sp, %0" : "=r" (ksp)); show_stack(current, ksp); unhandled_fault(regs->tpc, current, regs); @@ -137,7 +139,7 @@ static unsigned int get_user_insn(unsigned long tpc) if (!pte_present(pte)) goto out; - pa = (pte_val(pte) & _PAGE_PADDR); + pa = (pte_pfn(pte) << PAGE_SHIFT); pa += (tpc & ~PAGE_MASK); /* Use phys bypass so we don't pollute dtlb/dcache. */ @@ -257,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) struct vm_area_struct *vma; unsigned int insn = 0; int si_code, fault_code; - unsigned long address; + unsigned long address, mm_rss; fault_code = get_thread_fault_code(); @@ -280,7 +282,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) (tpc >= MODULES_VADDR && tpc < MODULES_END)) { /* Valid, no problems... */ } else { - bad_kernel_pc(regs); + bad_kernel_pc(regs, address); return; } } @@ -406,6 +408,11 @@ good_area: } up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); + if (unlikely(mm_rss >= mm->context.tsb_rss_limit)) + tsb_grow(mm, mm_rss); + return; /* diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index 580b63d..5fc5c57 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -15,15 +15,6 @@ #include <asm/page.h> #include <asm/tlbflush.h> -static inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space) -{ - pte_t pte; - pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E) & - ~(unsigned long)_PAGE_CACHE); - pte_val(pte) |= (((unsigned long)space) << 32); - return pte; -} - /* Remap IO memory, the same way as remap_pfn_range(), but use * the obio memory space. * @@ -48,24 +39,29 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, pte_t entry; unsigned long curend = address + PAGE_SIZE; - entry = mk_pte_io(offset, prot, space); + entry = mk_pte_io(offset, prot, space, PAGE_SIZE); if (!(address & 0xffff)) { - if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), - space); + if (PAGE_SIZE < (4 * 1024 * 1024) && + !(address & 0x3fffff) && + !(offset & 0x3ffffe) && + end >= address + 0x400000) { + entry = mk_pte_io(offset, prot, space, + 4 * 1024 * 1024); curend = address + 0x400000; offset += 0x400000; - } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ512K), - space); + } else if (PAGE_SIZE < (512 * 1024) && + !(address & 0x7ffff) && + !(offset & 0x7fffe) && + end >= address + 0x80000) { + entry = mk_pte_io(offset, prot, space, + 512 * 1024 * 1024); curend = address + 0x80000; offset += 0x80000; - } else if (!(offset & 0xfffe) && end >= address + 0x10000) { - entry = mk_pte_io(offset, - __pgprot(pgprot_val (prot) | _PAGE_SZ64K), - space); + } else if (PAGE_SIZE < (64 * 1024) && + !(offset & 0xfffe) && + end >= address + 0x10000) { + entry = mk_pte_io(offset, prot, space, + 64 * 1024); curend = address + 0x10000; offset += 0x10000; } else diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 625cbb3..a7a2486 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -1,7 +1,7 @@ /* * SPARC64 Huge TLB page support. * - * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com) + * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) */ #include <linux/config.h> @@ -22,6 +22,175 @@ #include <asm/cacheflush.h> #include <asm/mmu_context.h> +/* Slightly simplified from the non-hugepage variant because by + * definition we don't have to worry about any page coloring stuff + */ +#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) +#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; + unsigned long task_size = TASK_SIZE; + unsigned long start_addr; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + if (unlikely(len >= VA_EXCLUDE_START)) + return -ENOMEM; + + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + + task_size -= len; + +full_search: + addr = ALIGN(addr, HPAGE_SIZE); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (addr < VA_EXCLUDE_START && + (addr + len) >= VA_EXCLUDE_START) { + addr = VA_EXCLUDE_END; + vma = find_vma(mm, VA_EXCLUDE_END); + } + if (unlikely(task_size < addr)) { + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (likely(!vma || addr + len <= vma->vm_start)) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + } +} + +static unsigned long +hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache & HPAGE_MASK; + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = (mm->mmap_base-len) & HPAGE_MASK; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = (vma->vm_start-len) & HPAGE_MASK; + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long task_size = TASK_SIZE; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > task_size) + return -ENOMEM; + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -48,12 +217,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pmd_t *pmd; pte_t *pte = NULL; + addr &= HPAGE_MASK; + pgd = pgd_offset(mm, addr); - if (pgd) { + if (!pgd_none(*pgd)) { pud = pud_offset(pgd, addr); - if (pud) { + if (!pud_none(*pud)) { pmd = pmd_offset(pud, addr); - if (pmd) + if (!pmd_none(*pmd)) pte = pte_offset_map(pmd, addr); } } diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 1e44ee2..c2b5561 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -6,6 +6,7 @@ */ #include <linux/config.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> @@ -39,9 +40,27 @@ #include <asm/tlb.h> #include <asm/spitfire.h> #include <asm/sections.h> +#include <asm/tsb.h> +#include <asm/hypervisor.h> extern void device_scan(void); +#define MAX_PHYS_ADDRESS (1UL << 42UL) +#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) +#define KPTE_BITMAP_BYTES \ + ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) + +unsigned long kern_linear_pte_xor[2] __read_mostly; + +/* A bitmap, one bit for every 256MB of physical memory. If the bit + * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else + * if set we should use a 256MB page (via kern_linear_pte_xor[1]). + */ +unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; + +/* A special kernel TSB for 4MB and 256MB linear mappings. */ +struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; + #define MAX_BANKS 32 static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; @@ -111,11 +130,9 @@ static void __init read_obp_memory(const char *property, unsigned long *sparc64_valid_addr_bitmap __read_mostly; -/* Ugly, but necessary... -DaveM */ -unsigned long phys_base __read_mostly; +/* Kernel physical address base and size in bytes. */ unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; -unsigned long pfn_base __read_mostly; /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); @@ -141,24 +158,28 @@ unsigned long sparc64_kern_sec_context __read_mostly; int bigkernel = 0; -/* XXX Tune this... */ -#define PGT_CACHE_LOW 25 -#define PGT_CACHE_HIGH 50 +kmem_cache_t *pgtable_cache __read_mostly; + +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +{ + clear_page(addr); +} + +extern void tsb_cache_init(void); -void check_pgt_cache(void) +void pgtable_cache_init(void) { - preempt_disable(); - if (pgtable_cache_size > PGT_CACHE_HIGH) { - do { - if (pgd_quicklist) - free_pgd_slow(get_pgd_fast()); - if (pte_quicklist[0]) - free_pte_slow(pte_alloc_one_fast(NULL, 0)); - if (pte_quicklist[1]) - free_pte_slow(pte_alloc_one_fast(NULL, 1 << (PAGE_SHIFT + 10))); - } while (pgtable_cache_size > PGT_CACHE_LOW); + pgtable_cache = kmem_cache_create("pgtable_cache", + PAGE_SIZE, PAGE_SIZE, + SLAB_HWCACHE_ALIGN | + SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (!pgtable_cache) { + prom_printf("Could not create pgtable_cache\n"); + prom_halt(); } - preempt_enable(); + tsb_cache_init(); } #ifdef CONFIG_DEBUG_DCFLUSH @@ -168,8 +189,9 @@ atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0); #endif #endif -__inline__ void flush_dcache_page_impl(struct page *page) +inline void flush_dcache_page_impl(struct page *page) { + BUG_ON(tlb_type == hypervisor); #ifdef CONFIG_DEBUG_DCFLUSH atomic_inc(&dcpage_flushes); #endif @@ -186,8 +208,8 @@ __inline__ void flush_dcache_page_impl(struct page *page) } #define PG_dcache_dirty PG_arch_1 -#define PG_dcache_cpu_shift 24 -#define PG_dcache_cpu_mask (256 - 1) +#define PG_dcache_cpu_shift 24UL +#define PG_dcache_cpu_mask (256UL - 1UL) #if NR_CPUS > 256 #error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus @@ -243,32 +265,61 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c : "g1", "g7"); } +static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte) +{ + unsigned long tsb_addr = (unsigned long) ent; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + tsb_addr = __pa(tsb_addr); + + __tsb_insert(tsb_addr, tag, pte); +} + +unsigned long _PAGE_ALL_SZ_BITS __read_mostly; +unsigned long _PAGE_SZBITS __read_mostly; + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { - struct page *page; - unsigned long pfn; - unsigned long pg_flags; - - pfn = pte_pfn(pte); - if (pfn_valid(pfn) && - (page = pfn_to_page(pfn), page_mapping(page)) && - ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { - int cpu = ((pg_flags >> PG_dcache_cpu_shift) & - PG_dcache_cpu_mask); - int this_cpu = get_cpu(); - - /* This is just to optimize away some function calls - * in the SMP case. - */ - if (cpu == this_cpu) - flush_dcache_page_impl(page); - else - smp_flush_dcache_page_impl(page, cpu); + struct mm_struct *mm; + struct tsb *tsb; + unsigned long tag, flags; + + if (tlb_type != hypervisor) { + unsigned long pfn = pte_pfn(pte); + unsigned long pg_flags; + struct page *page; + + if (pfn_valid(pfn) && + (page = pfn_to_page(pfn), page_mapping(page)) && + ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) { + int cpu = ((pg_flags >> PG_dcache_cpu_shift) & + PG_dcache_cpu_mask); + int this_cpu = get_cpu(); + + /* This is just to optimize away some function calls + * in the SMP case. + */ + if (cpu == this_cpu) + flush_dcache_page_impl(page); + else + smp_flush_dcache_page_impl(page, cpu); - clear_dcache_dirty_cpu(page, cpu); + clear_dcache_dirty_cpu(page, cpu); - put_cpu(); + put_cpu(); + } } + + mm = vma->vm_mm; + + spin_lock_irqsave(&mm->context.lock, flags); + + tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & + (mm->context.tsb_nentries - 1UL)]; + tag = (address >> 22UL); + tsb_insert(tsb, tag, pte_val(pte)); + + spin_unlock_irqrestore(&mm->context.lock, flags); } void flush_dcache_page(struct page *page) @@ -276,6 +327,9 @@ void flush_dcache_page(struct page *page) struct address_space *mapping; int this_cpu; + if (tlb_type == hypervisor) + return; + /* Do not bother with the expensive D-cache flush if it * is merely the zero page. The 'bigcore' testcase in GDB * causes this case to run millions of times. @@ -311,7 +365,7 @@ out: void __kprobes flush_icache_range(unsigned long start, unsigned long end) { - /* Cheetah has coherent I-cache. */ + /* Cheetah and Hypervisor platform cpus have coherent I-cache. */ if (tlb_type == spitfire) { unsigned long kaddr; @@ -320,16 +374,6 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end) } } -unsigned long page_to_pfn(struct page *page) -{ - return (unsigned long) ((page - mem_map) + pfn_base); -} - -struct page *pfn_to_page(unsigned long pfn) -{ - return (mem_map + (pfn - pfn_base)); -} - void show_mem(void) { printk("Mem-info:\n"); @@ -338,7 +382,6 @@ void show_mem(void) nr_swap_pages << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", num_physpages); printk("%d free pages\n", nr_free_pages()); - printk("%d pages in page table cache\n",pgtable_cache_size); } void mmu_info(struct seq_file *m) @@ -349,6 +392,8 @@ void mmu_info(struct seq_file *m) seq_printf(m, "MMU Type\t: Cheetah+\n"); else if (tlb_type == spitfire) seq_printf(m, "MMU Type\t: Spitfire\n"); + else if (tlb_type == hypervisor) + seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n"); else seq_printf(m, "MMU Type\t: ???\n"); @@ -371,45 +416,13 @@ struct linux_prom_translation { /* Exported for kernel TLB miss handling in ktlb.S */ struct linux_prom_translation prom_trans[512] __read_mostly; unsigned int prom_trans_ents __read_mostly; -unsigned int swapper_pgd_zero __read_mostly; - -extern unsigned long prom_boot_page; -extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle); -extern int prom_get_mmu_ihandle(void); -extern void register_prom_callbacks(void); /* Exported for SMP bootup purposes. */ unsigned long kern_locked_tte_data; -/* - * Translate PROM's mapping we capture at boot time into physical address. - * The second parameter is only set from prom_callback() invocations. - */ -unsigned long prom_virt_to_phys(unsigned long promva, int *error) -{ - int i; - - for (i = 0; i < prom_trans_ents; i++) { - struct linux_prom_translation *p = &prom_trans[i]; - - if (promva >= p->virt && - promva < (p->virt + p->size)) { - unsigned long base = p->data & _PAGE_PADDR; - - if (error) - *error = 0; - return base + (promva & (8192 - 1)); - } - } - if (error) - *error = 1; - return 0UL; -} - /* The obp translations are saved based on 8k pagesize, since obp can * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> - * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte - * scheme (also, see rant in inherit_locked_prom_mappings()). + * HI_OBP_ADDRESS range are handled in ktlb.S. */ static inline int in_obp_range(unsigned long vaddr) { @@ -490,6 +503,36 @@ static void __init read_obp_translations(void) } } +static void __init hypervisor_tlb_lock(unsigned long vaddr, + unsigned long pte, + unsigned long mmu) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + register unsigned long arg2 asm("%o2"); + register unsigned long arg3 asm("%o3"); + + func = HV_FAST_MMU_MAP_PERM_ADDR; + arg0 = vaddr; + arg1 = 0; + arg2 = pte; + arg3 = mmu; + __asm__ __volatile__("ta 0x80" + : "=&r" (func), "=&r" (arg0), + "=&r" (arg1), "=&r" (arg2), + "=&r" (arg3) + : "0" (func), "1" (arg0), "2" (arg1), + "3" (arg2), "4" (arg3)); + if (arg0 != 0) { + prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: " + "errors with %lx\n", vaddr, 0, pte, mmu, arg0); + prom_halt(); + } +} + +static unsigned long kern_large_tte(unsigned long paddr); + static void __init remap_kernel(void) { unsigned long phys_page, tte_vaddr, tte_data; @@ -497,25 +540,34 @@ static void __init remap_kernel(void) tte_vaddr = (unsigned long) KERNBASE; phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; - tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB | - _PAGE_CP | _PAGE_CV | _PAGE_P | - _PAGE_L | _PAGE_W)); + tte_data = kern_large_tte(phys_page); kern_locked_tte_data = tte_data; - /* Now lock us into the TLBs via OBP. */ - prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); - prom_itlb_load(tlb_ent, tte_data, tte_vaddr); - if (bigkernel) { - tlb_ent -= 1; - prom_dtlb_load(tlb_ent, - tte_data + 0x400000, - tte_vaddr + 0x400000); - prom_itlb_load(tlb_ent, - tte_data + 0x400000, - tte_vaddr + 0x400000); + /* Now lock us into the TLBs via Hypervisor or OBP. */ + if (tlb_type == hypervisor) { + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); + if (bigkernel) { + tte_vaddr += 0x400000; + tte_data += 0x400000; + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); + } + } else { + prom_dtlb_load(tlb_ent, tte_data, tte_vaddr); + prom_itlb_load(tlb_ent, tte_data, tte_vaddr); + if (bigkernel) { + tlb_ent -= 1; + prom_dtlb_load(tlb_ent, + tte_data + 0x400000, + tte_vaddr + 0x400000); + prom_itlb_load(tlb_ent, + tte_data + 0x400000, + tte_vaddr + 0x400000); + } + sparc64_highest_unlocked_tlb_ent = tlb_ent - 1; } - sparc64_highest_unlocked_tlb_ent = tlb_ent - 1; if (tlb_type == cheetah_plus) { sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | CTX_CHEETAH_PLUS_NUC); @@ -533,372 +585,14 @@ static void __init inherit_prom_mappings(void) prom_printf("Remapping the kernel... "); remap_kernel(); prom_printf("done.\n"); - - prom_printf("Registering callbacks... "); - register_prom_callbacks(); - prom_printf("done.\n"); -} - -/* The OBP specifications for sun4u mark 0xfffffffc00000000 and - * upwards as reserved for use by the firmware (I wonder if this - * will be the same on Cheetah...). We use this virtual address - * range for the VPTE table mappings of the nucleus so we need - * to zap them when we enter the PROM. -DaveM - */ -static void __flush_nucleus_vptes(void) -{ - unsigned long prom_reserved_base = 0xfffffffc00000000UL; - int i; - - /* Only DTLB must be checked for VPTE entries. */ - if (tlb_type == spitfire) { - for (i = 0; i < 63; i++) { - unsigned long tag; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no cheetah+ - * page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - tag = spitfire_get_dtlb_tag(i); - if (((tag & ~(PAGE_MASK)) == 0) && - ((tag & (PAGE_MASK)) >= prom_reserved_base)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - for (i = 0; i < 512; i++) { - unsigned long tag = cheetah_get_dtlb_tag(i, 2); - - if ((tag & ~PAGE_MASK) == 0 && - (tag & PAGE_MASK) >= prom_reserved_base) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - cheetah_put_dtlb_data(i, 0x0UL, 2); - } - - if (tlb_type != cheetah_plus) - continue; - - tag = cheetah_get_dtlb_tag(i, 3); - - if ((tag & ~PAGE_MASK) == 0 && - (tag & PAGE_MASK) >= prom_reserved_base) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - cheetah_put_dtlb_data(i, 0x0UL, 3); - } - } - } else { - /* Implement me :-) */ - BUG(); - } } -static int prom_ditlb_set; -struct prom_tlb_entry { - int tlb_ent; - unsigned long tlb_tag; - unsigned long tlb_data; -}; -struct prom_tlb_entry prom_itlb[16], prom_dtlb[16]; - void prom_world(int enter) { - unsigned long pstate; - int i; - if (!enter) set_fs((mm_segment_t) { get_thread_current_ds() }); - if (!prom_ditlb_set) - return; - - /* Make sure the following runs atomically. */ - __asm__ __volatile__("flushw\n\t" - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate" - : "=r" (pstate) - : "i" (PSTATE_IE)); - - if (enter) { - /* Kick out nucleus VPTEs. */ - __flush_nucleus_vptes(); - - /* Install PROM world. */ - for (i = 0; i < 16; i++) { - if (prom_dtlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS), - "i" (ASI_DMMU)); - if (tlb_type == spitfire) - spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - } - if (prom_itlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_itlb[i].tlb_tag), - "r" (TLB_TAG_ACCESS), - "i" (ASI_IMMU)); - if (tlb_type == spitfire) - spitfire_put_itlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - cheetah_put_litlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - } - } - } else { - for (i = 0; i < 16; i++) { - if (prom_dtlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - if (tlb_type == spitfire) - spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, 0x0UL); - else - cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, 0x0UL); - } - if (prom_itlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), - "i" (ASI_IMMU)); - if (tlb_type == spitfire) - spitfire_put_itlb_data(prom_itlb[i].tlb_ent, 0x0UL); - else - cheetah_put_litlb_data(prom_itlb[i].tlb_ent, 0x0UL); - } - } - } - __asm__ __volatile__("wrpr %0, 0, %%pstate" - : : "r" (pstate)); -} - -void inherit_locked_prom_mappings(int save_p) -{ - int i; - int dtlb_seen = 0; - int itlb_seen = 0; - - /* Fucking losing PROM has more mappings in the TLB, but - * it (conveniently) fails to mention any of these in the - * translations property. The only ones that matter are - * the locked PROM tlb entries, so we impose the following - * irrecovable rule on the PROM, it is allowed 8 locked - * entries in the ITLB and 8 in the DTLB. - * - * Supposedly the upper 16GB of the address space is - * reserved for OBP, BUT I WISH THIS WAS DOCUMENTED - * SOMEWHERE!!!!!!!!!!!!!!!!! Furthermore the entire interface - * used between the client program and the firmware on sun5 - * systems to coordinate mmu mappings is also COMPLETELY - * UNDOCUMENTED!!!!!! Thanks S(t)un! - */ - if (save_p) { - for (i = 0; i < 16; i++) { - prom_itlb[i].tlb_ent = -1; - prom_dtlb[i].tlb_ent = -1; - } - } - if (tlb_type == spitfire) { - int high = sparc64_highest_unlocked_tlb_ent; - for (i = 0; i <= high; i++) { - unsigned long data; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no cheetah+ - * page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - data = spitfire_get_dtlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - tag = spitfire_get_dtlb_tag(i); - if (save_p) { - prom_dtlb[dtlb_seen].tlb_ent = i; - prom_dtlb[dtlb_seen].tlb_tag = tag; - prom_dtlb[dtlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - - dtlb_seen++; - if (dtlb_seen > 15) - break; - } - } - - for (i = 0; i < high; i++) { - unsigned long data; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - data = spitfire_get_itlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - tag = spitfire_get_itlb_tag(i); - if (save_p) { - prom_itlb[itlb_seen].tlb_ent = i; - prom_itlb[itlb_seen].tlb_tag = tag; - prom_itlb[itlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - spitfire_put_itlb_data(i, 0x0UL); - - itlb_seen++; - if (itlb_seen > 15) - break; - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - int high = sparc64_highest_unlocked_tlb_ent; - - for (i = 0; i <= high; i++) { - unsigned long data; - - data = cheetah_get_ldtlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - tag = cheetah_get_ldtlb_tag(i); - if (save_p) { - prom_dtlb[dtlb_seen].tlb_ent = i; - prom_dtlb[dtlb_seen].tlb_tag = tag; - prom_dtlb[dtlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - cheetah_put_ldtlb_data(i, 0x0UL); - - dtlb_seen++; - if (dtlb_seen > 15) - break; - } - } - - for (i = 0; i < high; i++) { - unsigned long data; - - data = cheetah_get_litlb_data(i); - if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) { - unsigned long tag; - - tag = cheetah_get_litlb_tag(i); - if (save_p) { - prom_itlb[itlb_seen].tlb_ent = i; - prom_itlb[itlb_seen].tlb_tag = tag; - prom_itlb[itlb_seen].tlb_data = data; - } - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - cheetah_put_litlb_data(i, 0x0UL); - - itlb_seen++; - if (itlb_seen > 15) - break; - } - } - } else { - /* Implement me :-) */ - BUG(); - } - if (save_p) - prom_ditlb_set = 1; -} - -/* Give PROM back his world, done during reboots... */ -void prom_reload_locked(void) -{ - int i; - - for (i = 0; i < 16; i++) { - if (prom_dtlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS), - "i" (ASI_DMMU)); - if (tlb_type == spitfire) - spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - else if (tlb_type == cheetah || tlb_type == cheetah_plus) - cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, - prom_dtlb[i].tlb_data); - } - - if (prom_itlb[i].tlb_ent != -1) { - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "membar #Sync" - : : "r" (prom_itlb[i].tlb_tag), - "r" (TLB_TAG_ACCESS), - "i" (ASI_IMMU)); - if (tlb_type == spitfire) - spitfire_put_itlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - else - cheetah_put_litlb_data(prom_itlb[i].tlb_ent, - prom_itlb[i].tlb_data); - } - } + __asm__ __volatile__("flushw"); } #ifdef DCACHE_ALIASING_POSSIBLE @@ -914,7 +608,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end) if (++n >= 512) break; } - } else { + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { start = __pa(start); end = __pa(end); for (va = start; va < end; va += 32) @@ -927,63 +621,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end) } #endif /* DCACHE_ALIASING_POSSIBLE */ -/* If not locked, zap it. */ -void __flush_tlb_all(void) -{ - unsigned long pstate; - int i; - - __asm__ __volatile__("flushw\n\t" - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate" - : "=r" (pstate) - : "i" (PSTATE_IE)); - if (tlb_type == spitfire) { - for (i = 0; i < 64; i++) { - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - if (!(spitfire_get_dtlb_data(i) & _PAGE_L)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); - spitfire_put_dtlb_data(i, 0x0UL); - } - - /* Spitfire Errata #32 workaround */ - /* NOTE: Always runs on spitfire, so no - * cheetah+ page size encodings. - */ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" - "flush %%g6" - : /* No outputs */ - : "r" (0), - "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - - if (!(spitfire_get_itlb_data(i) & _PAGE_L)) { - __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); - spitfire_put_itlb_data(i, 0x0UL); - } - } - } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { - cheetah_flush_dtlb_all(); - cheetah_flush_itlb_all(); - } - __asm__ __volatile__("wrpr %0, 0, %%pstate" - : : "r" (pstate)); -} - /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. * @@ -991,17 +628,21 @@ void __flush_tlb_all(void) * let the user have CTX 0 (nucleus) or we ever use a CTX * version of zero (and thus NO_CONTEXT would not be caught * by version mis-match tests in mmu_context.h). + * + * Always invoked with interrupts disabled. */ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - + unsigned long flags; + int new_version; - spin_lock(&ctx_alloc_lock); + spin_lock_irqsave(&ctx_alloc_lock, flags); orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); + new_version = 0; if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { @@ -1024,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm) mmu_context_bmap[i + 2] = 0; mmu_context_bmap[i + 3] = 0; } + new_version = 1; goto out; } } @@ -1032,79 +674,10 @@ void get_new_mmu_context(struct mm_struct *mm) out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; - spin_unlock(&ctx_alloc_lock); -} - -#ifndef CONFIG_SMP -struct pgtable_cache_struct pgt_quicklists; -#endif - -/* OK, we have to color these pages. The page tables are accessed - * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S - * code, as well as by PAGE_OFFSET range direct-mapped addresses by - * other parts of the kernel. By coloring, we make sure that the tlbmiss - * fast handlers do not get data from old/garbage dcache lines that - * correspond to an old/stale virtual address (user/kernel) that - * previously mapped the pagetable page while accessing vpte range - * addresses. The idea is that if the vpte color and PAGE_OFFSET range - * color is the same, then when the kernel initializes the pagetable - * using the later address range, accesses with the first address - * range will see the newly initialized data rather than the garbage. - */ -#ifdef DCACHE_ALIASING_POSSIBLE -#define DC_ALIAS_SHIFT 1 -#else -#define DC_ALIAS_SHIFT 0 -#endif -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - struct page *page; - unsigned long color; - - { - pte_t *ptep = pte_alloc_one_fast(mm, address); - - if (ptep) - return ptep; - } + spin_unlock_irqrestore(&ctx_alloc_lock, flags); - color = VPTE_COLOR(address); - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, DC_ALIAS_SHIFT); - if (page) { - unsigned long *to_free; - unsigned long paddr; - pte_t *pte; - -#ifdef DCACHE_ALIASING_POSSIBLE - set_page_count(page, 1); - ClearPageCompound(page); - - set_page_count((page + 1), 1); - ClearPageCompound(page + 1); -#endif - paddr = (unsigned long) page_address(page); - memset((char *)paddr, 0, (PAGE_SIZE << DC_ALIAS_SHIFT)); - - if (!color) { - pte = (pte_t *) paddr; - to_free = (unsigned long *) (paddr + PAGE_SIZE); - } else { - pte = (pte_t *) (paddr + PAGE_SIZE); - to_free = (unsigned long *) paddr; - } - -#ifdef DCACHE_ALIASING_POSSIBLE - /* Now free the other one up, adjust cache size. */ - preempt_disable(); - *to_free = (unsigned long) pte_quicklist[color ^ 0x1]; - pte_quicklist[color ^ 0x1] = to_free; - pgtable_cache_size++; - preempt_enable(); -#endif - - return pte; - } - return NULL; + if (unlikely(new_version)) + smp_new_mmu_context_version(); } void sparc_ultra_dump_itlb(void) @@ -1196,9 +769,78 @@ void sparc_ultra_dump_dtlb(void) extern unsigned long cmdline_memory_size; -unsigned long __init bootmem_init(unsigned long *pages_avail) +/* Find a free area for the bootmem map, avoiding the kernel image + * and the initial ramdisk. + */ +static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long avoid_start, avoid_end, bootmap_size; + int i; + + bootmap_size = ((end_pfn - start_pfn) + 7) / 8; + bootmap_size = ALIGN(bootmap_size, sizeof(long)); + + avoid_start = avoid_end = 0; +#ifdef CONFIG_BLK_DEV_INITRD + avoid_start = initrd_start; + avoid_end = PAGE_ALIGN(initrd_end); +#endif + +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n", + kern_base, PAGE_ALIGN(kern_base + kern_size), + avoid_start, avoid_end); +#endif + for (i = 0; i < pavail_ents; i++) { + unsigned long start, end; + + start = pavail[i].phys_addr; + end = start + pavail[i].reg_size; + + while (start < end) { + if (start >= kern_base && + start < PAGE_ALIGN(kern_base + kern_size)) { + start = PAGE_ALIGN(kern_base + kern_size); + continue; + } + if (start >= avoid_start && start < avoid_end) { + start = avoid_end; + continue; + } + + if ((end - start) < bootmap_size) + break; + + if (start < kern_base && + (start + bootmap_size) > kern_base) { + start = PAGE_ALIGN(kern_base + kern_size); + continue; + } + + if (start < avoid_start && + (start + bootmap_size) > avoid_start) { + start = avoid_end; + continue; + } + + /* OK, it doesn't overlap anything, use it. */ +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n", + start >> PAGE_SHIFT, start); +#endif + return start >> PAGE_SHIFT; + } + } + + prom_printf("Cannot find free area for bootmap, aborting.\n"); + prom_halt(); +} + +static unsigned long __init bootmem_init(unsigned long *pages_avail, + unsigned long phys_base) { - unsigned long bootmap_size, start_pfn, end_pfn; + unsigned long bootmap_size, end_pfn; unsigned long end_of_phys_memory = 0UL; unsigned long bootmap_pfn, bytes_avail, size; int i; @@ -1236,14 +878,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) *pages_avail = bytes_avail >> PAGE_SHIFT; - /* Start with page aligned address of last symbol in kernel - * image. The kernel is hard mapped below PAGE_OFFSET in a - * 4MB locked TLB translation. - */ - start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT; - - bootmap_pfn = start_pfn; - end_pfn = end_of_phys_memory >> PAGE_SHIFT; #ifdef CONFIG_BLK_DEV_INITRD @@ -1260,23 +894,22 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) "(0x%016lx > 0x%016lx)\ndisabling initrd\n", initrd_end, end_of_phys_memory); initrd_start = 0; - } - if (initrd_start) { - if (initrd_start >= (start_pfn << PAGE_SHIFT) && - initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE) - bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT; + initrd_end = 0; } } #endif /* Initialize the boot-time allocator. */ max_pfn = max_low_pfn = end_pfn; - min_low_pfn = pfn_base; + min_low_pfn = (phys_base >> PAGE_SHIFT); + + bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn); #ifdef CONFIG_DEBUG_BOOTMEM prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n", min_low_pfn, bootmap_pfn, max_low_pfn); #endif - bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn); + bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, + min_low_pfn, end_pfn); /* Now register the available physical memory with the * allocator. @@ -1324,9 +957,26 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size); *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; + for (i = 0; i < pavail_ents; i++) { + unsigned long start_pfn, end_pfn; + + start_pfn = pavail[i].phys_addr >> PAGE_SHIFT; + end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT)); +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("memory_present(0, %lx, %lx)\n", + start_pfn, end_pfn); +#endif + memory_present(0, start_pfn, end_pfn); + } + + sparse_init(); + return end_pfn; } +static struct linux_prom64_registers pall[MAX_BANKS] __initdata; +static int pall_ents __initdata; + #ifdef CONFIG_DEBUG_PAGEALLOC static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot) { @@ -1382,14 +1032,44 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, return alloc_bytes; } -static struct linux_prom64_registers pall[MAX_BANKS] __initdata; -static int pall_ents __initdata; - extern unsigned int kvmap_linear_patch[1]; +#endif /* CONFIG_DEBUG_PAGEALLOC */ + +static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +{ + const unsigned long shift_256MB = 28; + const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL); + const unsigned long size_256MB = (1UL << shift_256MB); + + while (start < end) { + long remains; + + remains = end - start; + if (remains < size_256MB) + break; + + if (start & mask_256MB) { + start = (start + size_256MB) & ~mask_256MB; + continue; + } + + while (remains >= size_256MB) { + unsigned long index = start >> shift_256MB; + + __set_bit(index, kpte_linear_bitmap); + + start += size_256MB; + remains -= size_256MB; + } + } +} static void __init kernel_physical_mapping_init(void) { - unsigned long i, mem_alloced = 0UL; + unsigned long i; +#ifdef CONFIG_DEBUG_PAGEALLOC + unsigned long mem_alloced = 0UL; +#endif read_obp_memory("reg", &pall[0], &pall_ents); @@ -1398,10 +1078,16 @@ static void __init kernel_physical_mapping_init(void) phys_start = pall[i].phys_addr; phys_end = phys_start + pall[i].reg_size; + + mark_kpte_bitmap(phys_start, phys_end); + +#ifdef CONFIG_DEBUG_PAGEALLOC mem_alloced += kernel_map_range(phys_start, phys_end, PAGE_KERNEL); +#endif } +#ifdef CONFIG_DEBUG_PAGEALLOC printk("Allocated %ld bytes for kernel page tables.\n", mem_alloced); @@ -1409,8 +1095,10 @@ static void __init kernel_physical_mapping_init(void) flushi(&kvmap_linear_patch[0]); __flush_tlb_all(); +#endif } +#ifdef CONFIG_DEBUG_PAGEALLOC void kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT; @@ -1419,6 +1107,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable) kernel_map_range(phys_start, phys_end, (enable ? PAGE_KERNEL : __pgprot(0))); + flush_tsb_kernel_range(PAGE_OFFSET + phys_start, + PAGE_OFFSET + phys_end); + /* we should perform an IPI and flush all tlbs, * but that can deadlock->flush only current cpu. */ @@ -1439,18 +1130,150 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +static void __init tsb_phys_patch(void) +{ + struct tsb_ldquad_phys_patch_entry *pquad; + struct tsb_phys_patch_entry *p; + + pquad = &__tsb_ldquad_phys_patch; + while (pquad < &__tsb_ldquad_phys_patch_end) { + unsigned long addr = pquad->addr; + + if (tlb_type == hypervisor) + *(unsigned int *) addr = pquad->sun4v_insn; + else + *(unsigned int *) addr = pquad->sun4u_insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + pquad++; + } + + p = &__tsb_phys_patch; + while (p < &__tsb_phys_patch_end) { + unsigned long addr = p->addr; + + *(unsigned int *) addr = p->insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + p++; + } +} + +/* Don't mark as init, we give this to the Hypervisor. */ +static struct hv_tsb_descr ktsb_descr[2]; +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +static void __init sun4v_ktsb_init(void) +{ + unsigned long ktsb_pa; + + /* First KTSB for PAGE_SIZE mappings. */ + ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); + + switch (PAGE_SIZE) { + case 8 * 1024: + default: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K; + break; + + case 64 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K; + break; + + case 512 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K; + break; + + case 4 * 1024 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB; + break; + }; + + ktsb_descr[0].assoc = 1; + ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES; + ktsb_descr[0].ctx_idx = 0; + ktsb_descr[0].tsb_base = ktsb_pa; + ktsb_descr[0].resv = 0; + + /* Second KTSB for 4MB/256MB mappings. */ + ktsb_pa = (kern_base + + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); + + ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; + ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | + HV_PGSZ_MASK_256MB); + ktsb_descr[1].assoc = 1; + ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; + ktsb_descr[1].ctx_idx = 0; + ktsb_descr[1].tsb_base = ktsb_pa; + ktsb_descr[1].resv = 0; +} + +void __cpuinit sun4v_ktsb_register(void) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + unsigned long pa; + + pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE); + + func = HV_FAST_MMU_TSB_CTX0; + arg0 = 2; + arg1 = pa; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); +} + /* paging_init() sets up the page tables */ extern void cheetah_ecache_flush_init(void); +extern void sun4v_patch_tlb_handlers(void); static unsigned long last_valid_pfn; pgd_t swapper_pg_dir[2048]; +static void sun4u_pgprot_init(void); +static void sun4v_pgprot_init(void); + void __init paging_init(void) { - unsigned long end_pfn, pages_avail, shift; + unsigned long end_pfn, pages_avail, shift, phys_base; unsigned long real_end, i; + kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; + + /* Invalidate both kernel TSBs. */ + memset(swapper_tsb, 0x40, sizeof(swapper_tsb)); + memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); + + if (tlb_type == hypervisor) + sun4v_pgprot_init(); + else + sun4u_pgprot_init(); + + if (tlb_type == cheetah_plus || + tlb_type == hypervisor) + tsb_phys_patch(); + + if (tlb_type == hypervisor) { + sun4v_patch_tlb_handlers(); + sun4v_ktsb_init(); + } + /* Find available physical memory... */ read_obp_memory("available", &pavail[0], &pavail_ents); @@ -1458,11 +1281,6 @@ void __init paging_init(void) for (i = 0; i < pavail_ents; i++) phys_base = min(phys_base, pavail[i].phys_addr); - pfn_base = phys_base >> PAGE_SHIFT; - - kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; - kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; - set_bit(0, mmu_context_bmap); shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); @@ -1486,47 +1304,38 @@ void __init paging_init(void) pud_set(pud_offset(&swapper_pg_dir[0], 0), swapper_low_pmd_dir + (shift / sizeof(pgd_t))); - swapper_pgd_zero = pgd_val(swapper_pg_dir[0]); - inherit_prom_mappings(); - /* Ok, we can use our TLB miss and window trap handlers safely. - * We need to do a quick peek here to see if we are on StarFire - * or not, so setup_tba can setup the IRQ globals correctly (it - * needs to get the hard smp processor id correctly). - */ - { - extern void setup_tba(int); - setup_tba(this_is_starfire); - } - - inherit_locked_prom_mappings(1); + /* Ok, we can use our TLB miss and window trap handlers safely. */ + setup_tba(); __flush_tlb_all(); + if (tlb_type == hypervisor) + sun4v_ktsb_register(); + /* Setup bootmem... */ pages_avail = 0; - last_valid_pfn = end_pfn = bootmem_init(&pages_avail); + last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); + + max_mapnr = last_valid_pfn; -#ifdef CONFIG_DEBUG_PAGEALLOC kernel_physical_mapping_init(); -#endif { unsigned long zones_size[MAX_NR_ZONES]; unsigned long zholes_size[MAX_NR_ZONES]; - unsigned long npages; int znum; for (znum = 0; znum < MAX_NR_ZONES; znum++) zones_size[znum] = zholes_size[znum] = 0; - npages = end_pfn - pfn_base; - zones_size[ZONE_DMA] = npages; - zholes_size[ZONE_DMA] = npages - pages_avail; + zones_size[ZONE_DMA] = end_pfn; + zholes_size[ZONE_DMA] = end_pfn - pages_avail; free_area_init_node(0, &contig_page_data, zones_size, - phys_base >> PAGE_SHIFT, zholes_size); + __pa(PAGE_OFFSET) >> PAGE_SHIFT, + zholes_size); } device_scan(); @@ -1596,7 +1405,6 @@ void __init mem_init(void) taint_real_pages(); - max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(last_valid_pfn << PAGE_SHIFT); #ifdef CONFIG_DEBUG_BOOTMEM @@ -1676,3 +1484,342 @@ void free_initrd_mem(unsigned long start, unsigned long end) } } #endif + +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + +pgprot_t PAGE_KERNEL __read_mostly; +EXPORT_SYMBOL(PAGE_KERNEL); + +pgprot_t PAGE_KERNEL_LOCKED __read_mostly; +pgprot_t PAGE_COPY __read_mostly; + +pgprot_t PAGE_SHARED __read_mostly; +EXPORT_SYMBOL(PAGE_SHARED); + +pgprot_t PAGE_EXEC __read_mostly; +unsigned long pg_iobits __read_mostly; + +unsigned long _PAGE_IE __read_mostly; + +unsigned long _PAGE_E __read_mostly; +EXPORT_SYMBOL(_PAGE_E); + +unsigned long _PAGE_CACHE __read_mostly; +EXPORT_SYMBOL(_PAGE_CACHE); + +static void prot_init_common(unsigned long page_none, + unsigned long page_shared, + unsigned long page_copy, + unsigned long page_readonly, + unsigned long page_exec_bit) +{ + PAGE_COPY = __pgprot(page_copy); + PAGE_SHARED = __pgprot(page_shared); + + protection_map[0x0] = __pgprot(page_none); + protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x4] = __pgprot(page_readonly); + protection_map[0x5] = __pgprot(page_readonly); + protection_map[0x6] = __pgprot(page_copy); + protection_map[0x7] = __pgprot(page_copy); + protection_map[0x8] = __pgprot(page_none); + protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xc] = __pgprot(page_readonly); + protection_map[0xd] = __pgprot(page_readonly); + protection_map[0xe] = __pgprot(page_shared); + protection_map[0xf] = __pgprot(page_shared); +} + +static void __init sun4u_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U); + PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U | _PAGE_L_4U); + PAGE_EXEC = __pgprot(_PAGE_EXEC_4U); + + _PAGE_IE = _PAGE_IE_4U; + _PAGE_E = _PAGE_E_4U; + _PAGE_CACHE = _PAGE_CACHE_4U; + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U | + __ACCESS_BITS_4U | _PAGE_E_4U); + + kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ + 0xfffff80000000000; + kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | + _PAGE_P_4U | _PAGE_W_4U); + + /* XXX Should use 256MB on Panther. XXX */ + kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + + _PAGE_SZBITS = _PAGE_SZBITS_4U; + _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | + _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | + _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); + + + page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + + page_exec_bit = _PAGE_EXEC_4U; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +static void __init sun4v_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | + _PAGE_CACHE_4V | _PAGE_P_4V | + __ACCESS_BITS_4V | __DIRTY_BITS_4V | + _PAGE_EXEC_4V); + PAGE_KERNEL_LOCKED = PAGE_KERNEL; + PAGE_EXEC = __pgprot(_PAGE_EXEC_4V); + + _PAGE_IE = _PAGE_IE_4V; + _PAGE_E = _PAGE_E_4V; + _PAGE_CACHE = _PAGE_CACHE_4V; + + kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ + 0xfffff80000000000; + kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ + 0xfffff80000000000; + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | + __ACCESS_BITS_4V | _PAGE_E_4V); + + _PAGE_SZBITS = _PAGE_SZBITS_4V; + _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | + _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | + _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | + _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); + + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + + page_exec_bit = _PAGE_EXEC_4V; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +unsigned long pte_sz_bits(unsigned long sz) +{ + if (tlb_type == hypervisor) { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4V; + case 64 * 1024: + return _PAGE_SZ64K_4V; + case 512 * 1024: + return _PAGE_SZ512K_4V; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4V; + }; + } else { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4U; + case 64 * 1024: + return _PAGE_SZ64K_4U; + case 512 * 1024: + return _PAGE_SZ512K_4U; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4U; + }; + } +} + +pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size) +{ + pte_t pte; + + pte_val(pte) = page | pgprot_val(pgprot_noncached(prot)); + pte_val(pte) |= (((unsigned long)space) << 32); + pte_val(pte) |= pte_sz_bits(page_size); + + return pte; +} + +static unsigned long kern_large_tte(unsigned long paddr) +{ + unsigned long val; + + val = (_PAGE_VALID | _PAGE_SZ4MB_4U | + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | + _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); + if (tlb_type == hypervisor) + val = (_PAGE_VALID | _PAGE_SZ4MB_4V | + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + _PAGE_EXEC_4V | _PAGE_W_4V); + + return val | paddr; +} + +/* + * Translate PROM's mapping we capture at boot time into physical address. + * The second parameter is only set from prom_callback() invocations. + */ +unsigned long prom_virt_to_phys(unsigned long promva, int *error) +{ + unsigned long mask; + int i; + + mask = _PAGE_PADDR_4U; + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + for (i = 0; i < prom_trans_ents; i++) { + struct linux_prom_translation *p = &prom_trans[i]; + + if (promva >= p->virt && + promva < (p->virt + p->size)) { + unsigned long base = p->data & mask; + + if (error) + *error = 0; + return base + (promva & (8192 - 1)); + } + } + if (error) + *error = 1; + return 0UL; +} + +/* XXX We should kill off this ugly thing at so me point. XXX */ +unsigned long sun4u_get_pte(unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long mask = _PAGE_PADDR_4U; + + if (tlb_type == hypervisor) + mask = _PAGE_PADDR_4V; + + if (addr >= PAGE_OFFSET) + return addr & mask; + + if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS)) + return prom_virt_to_phys(addr, NULL); + + pgdp = pgd_offset_k(addr); + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + ptep = pte_offset_kernel(pmdp, addr); + + return pte_val(*ptep) & mask; +} + +/* If not locked, zap it. */ +void __flush_tlb_all(void) +{ + unsigned long pstate; + int i; + + __asm__ __volatile__("flushw\n\t" + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + if (tlb_type == spitfire) { + for (i = 0; i < 64; i++) { + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); + spitfire_put_dtlb_data(i, 0x0UL); + } + + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); + spitfire_put_itlb_data(i, 0x0UL); + } + } + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { + cheetah_flush_dtlb_all(); + cheetah_flush_itlb_all(); + } + __asm__ __volatile__("wrpr %0, 0, %%pstate" + : : "r" (pstate)); +} + +#ifdef CONFIG_MEMORY_HOTPLUG + +void online_page(struct page *page) +{ + ClearPageReserved(page); + set_page_count(page, 0); + free_cold_page(page); + totalram_pages++; + num_physpages++; +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c index 8b104be..a079cf4 100644 --- a/arch/sparc64/mm/tlb.c +++ b/arch/sparc64/mm/tlb.c @@ -25,6 +25,8 @@ void flush_tlb_pending(void) struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); if (mp->tlb_nr) { + flush_tsb_user(mp); + if (CTX_VALID(mp->mm->context)) { #ifdef CONFIG_SMP smp_flush_tlb_pending(mp->mm, mp->tlb_nr, @@ -47,7 +49,8 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t if (pte_exec(orig)) vaddr |= 0x1UL; - if (pte_dirty(orig)) { + if (tlb_type != hypervisor && + pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); struct address_space *mapping; struct page *page; @@ -89,62 +92,3 @@ no_cache_flush: if (nr >= TLB_BATCH_NR) flush_tlb_pending(); } - -void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) -{ - struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); - unsigned long nr = mp->tlb_nr; - long s = start, e = end, vpte_base; - - if (mp->fullmm) - return; - - /* If start is greater than end, that is a real problem. */ - BUG_ON(start > end); - - /* However, straddling the VA space hole is quite normal. */ - s &= PMD_MASK; - e = (e + PMD_SIZE - 1) & PMD_MASK; - - vpte_base = (tlb_type == spitfire ? - VPTE_BASE_SPITFIRE : - VPTE_BASE_CHEETAH); - - if (unlikely(nr != 0 && mm != mp->mm)) { - flush_tlb_pending(); - nr = 0; - } - - if (nr == 0) - mp->mm = mm; - - start = vpte_base + (s >> (PAGE_SHIFT - 3)); - end = vpte_base + (e >> (PAGE_SHIFT - 3)); - - /* If the request straddles the VA space hole, we - * need to swap start and end. The reason this - * occurs is that "vpte_base" is the center of - * the linear page table mapping area. Thus, - * high addresses with the sign bit set map to - * addresses below vpte_base and non-sign bit - * addresses map to addresses above vpte_base. - */ - if (end < start) { - unsigned long tmp = start; - - start = end; - end = tmp; - } - - while (start < end) { - mp->vaddrs[nr] = start; - mp->tlb_nr = ++nr; - if (nr >= TLB_BATCH_NR) { - flush_tlb_pending(); - nr = 0; - } - start += PAGE_SIZE; - } - if (nr) - flush_tlb_pending(); -} diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c new file mode 100644 index 0000000..b2064e2 --- /dev/null +++ b/arch/sparc64/mm/tsb.c @@ -0,0 +1,440 @@ +/* arch/sparc64/mm/tsb.c + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <asm/system.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tsb.h> +#include <asm/oplib.h> + +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries) +{ + vaddr >>= PAGE_SHIFT; + return vaddr & (nentries - 1); +} + +static inline int tag_compare(unsigned long tag, unsigned long vaddr) +{ + return (tag == (vaddr >> 22)); +} + +/* TSB flushes need only occur on the processor initiating the address + * space modification, not on each cpu the address space has run on. + * Only the TLB flush needs that treatment. + */ + +void flush_tsb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long v; + + for (v = start; v < end; v += PAGE_SIZE) { + unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES); + struct tsb *ent = &swapper_tsb[hash]; + + if (tag_compare(ent->tag, v)) { + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + membar_storeload_storestore(); + } + } +} + +void flush_tsb_user(struct mmu_gather *mp) +{ + struct mm_struct *mm = mp->mm; + unsigned long nentries, base, flags; + struct tsb *tsb; + int i; + + spin_lock_irqsave(&mm->context.lock, flags); + + tsb = mm->context.tsb; + nentries = mm->context.tsb_nentries; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(tsb); + else + base = (unsigned long) tsb; + + for (i = 0; i < mp->tlb_nr; i++) { + unsigned long v = mp->vaddrs[i]; + unsigned long tag, ent, hash; + + v &= ~0x1UL; + + hash = tsb_hash(v, nentries); + ent = base + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); + + tsb_flush(ent, tag); + } + + spin_unlock_irqrestore(&mm->context.lock, flags); +} + +static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) +{ + unsigned long tsb_reg, base, tsb_paddr; + unsigned long page_sz, tte; + + mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb); + + base = TSBMAP_BASE; + tte = pgprot_val(PAGE_KERNEL_LOCKED); + tsb_paddr = __pa(mm->context.tsb); + BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); + + /* Use the smallest page size that can map the whole TSB + * in one TLB entry. + */ + switch (tsb_bytes) { + case 8192 << 0: + tsb_reg = 0x0UL; +#ifdef DCACHE_ALIASING_POSSIBLE + base += (tsb_paddr & 8192); +#endif + page_sz = 8192; + break; + + case 8192 << 1: + tsb_reg = 0x1UL; + page_sz = 64 * 1024; + break; + + case 8192 << 2: + tsb_reg = 0x2UL; + page_sz = 64 * 1024; + break; + + case 8192 << 3: + tsb_reg = 0x3UL; + page_sz = 64 * 1024; + break; + + case 8192 << 4: + tsb_reg = 0x4UL; + page_sz = 512 * 1024; + break; + + case 8192 << 5: + tsb_reg = 0x5UL; + page_sz = 512 * 1024; + break; + + case 8192 << 6: + tsb_reg = 0x6UL; + page_sz = 512 * 1024; + break; + + case 8192 << 7: + tsb_reg = 0x7UL; + page_sz = 4 * 1024 * 1024; + break; + + default: + BUG(); + }; + tte |= pte_sz_bits(page_sz); + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + /* Physical mapping, no locked TLB entry for TSB. */ + tsb_reg |= tsb_paddr; + + mm->context.tsb_reg_val = tsb_reg; + mm->context.tsb_map_vaddr = 0; + mm->context.tsb_map_pte = 0; + } else { + tsb_reg |= base; + tsb_reg |= (tsb_paddr & (page_sz - 1UL)); + tte |= (tsb_paddr & ~(page_sz - 1UL)); + + mm->context.tsb_reg_val = tsb_reg; + mm->context.tsb_map_vaddr = base; + mm->context.tsb_map_pte = tte; + } + + /* Setup the Hypervisor TSB descriptor. */ + if (tlb_type == hypervisor) { + struct hv_tsb_descr *hp = &mm->context.tsb_descr; + + switch (PAGE_SIZE) { + case 8192: + default: + hp->pgsz_idx = HV_PGSZ_IDX_8K; + break; + + case 64 * 1024: + hp->pgsz_idx = HV_PGSZ_IDX_64K; + break; + + case 512 * 1024: + hp->pgsz_idx = HV_PGSZ_IDX_512K; + break; + + case 4 * 1024 * 1024: + hp->pgsz_idx = HV_PGSZ_IDX_4MB; + break; + }; + hp->assoc = 1; + hp->num_ttes = tsb_bytes / 16; + hp->ctx_idx = 0; + switch (PAGE_SIZE) { + case 8192: + default: + hp->pgsz_mask = HV_PGSZ_MASK_8K; + break; + + case 64 * 1024: + hp->pgsz_mask = HV_PGSZ_MASK_64K; + break; + + case 512 * 1024: + hp->pgsz_mask = HV_PGSZ_MASK_512K; + break; + + case 4 * 1024 * 1024: + hp->pgsz_mask = HV_PGSZ_MASK_4MB; + break; + }; + hp->tsb_base = tsb_paddr; + hp->resv = 0; + } +} + +static kmem_cache_t *tsb_caches[8] __read_mostly; + +static const char *tsb_cache_names[8] = { + "tsb_8KB", + "tsb_16KB", + "tsb_32KB", + "tsb_64KB", + "tsb_128KB", + "tsb_256KB", + "tsb_512KB", + "tsb_1MB", +}; + +void __init tsb_cache_init(void) +{ + unsigned long i; + + for (i = 0; i < 8; i++) { + unsigned long size = 8192 << i; + const char *name = tsb_cache_names[i]; + + tsb_caches[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN | + SLAB_MUST_HWCACHE_ALIGN, + NULL, NULL); + if (!tsb_caches[i]) { + prom_printf("Could not create %s cache\n", name); + prom_halt(); + } + } +} + +/* When the RSS of an address space exceeds mm->context.tsb_rss_limit, + * do_sparc64_fault() invokes this routine to try and grow the TSB. + * + * When we reach the maximum TSB size supported, we stick ~0UL into + * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() + * will not trigger any longer. + * + * The TSB can be anywhere from 8K to 1MB in size, in increasing powers + * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB + * must be 512K aligned. It also must be physically contiguous, so we + * cannot use vmalloc(). + * + * The idea here is to grow the TSB when the RSS of the process approaches + * the number of entries that the current TSB can hold at once. Currently, + * we trigger when the RSS hits 3/4 of the TSB capacity. + */ +void tsb_grow(struct mm_struct *mm, unsigned long rss) +{ + unsigned long max_tsb_size = 1 * 1024 * 1024; + unsigned long new_size, old_size, flags; + struct tsb *old_tsb, *new_tsb; + unsigned long new_cache_index, old_cache_index; + unsigned long new_rss_limit; + gfp_t gfp_flags; + + if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) + max_tsb_size = (PAGE_SIZE << MAX_ORDER); + + new_cache_index = 0; + for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { + unsigned long n_entries = new_size / sizeof(struct tsb); + + n_entries = (n_entries * 3) / 4; + if (n_entries > rss) + break; + + new_cache_index++; + } + + if (new_size == max_tsb_size) + new_rss_limit = ~0UL; + else + new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4; + +retry_tsb_alloc: + gfp_flags = GFP_KERNEL; + if (new_size > (PAGE_SIZE * 2)) + gfp_flags = __GFP_NOWARN | __GFP_NORETRY; + + new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags); + if (unlikely(!new_tsb)) { + /* Not being able to fork due to a high-order TSB + * allocation failure is very bad behavior. Just back + * down to a 0-order allocation and force no TSB + * growing for this address space. + */ + if (mm->context.tsb == NULL && new_cache_index > 0) { + new_cache_index = 0; + new_size = 8192; + new_rss_limit = ~0UL; + goto retry_tsb_alloc; + } + + /* If we failed on a TSB grow, we are under serious + * memory pressure so don't try to grow any more. + */ + if (mm->context.tsb != NULL) + mm->context.tsb_rss_limit = ~0UL; + return; + } + + /* Mark all tags as invalid. */ + tsb_init(new_tsb, new_size); + + /* Ok, we are about to commit the changes. If we are + * growing an existing TSB the locking is very tricky, + * so WATCH OUT! + * + * We have to hold mm->context.lock while committing to the + * new TSB, this synchronizes us with processors in + * flush_tsb_user() and switch_mm() for this address space. + * + * But even with that lock held, processors run asynchronously + * accessing the old TSB via TLB miss handling. This is OK + * because those actions are just propagating state from the + * Linux page tables into the TSB, page table mappings are not + * being changed. If a real fault occurs, the processor will + * synchronize with us when it hits flush_tsb_user(), this is + * also true for the case where vmscan is modifying the page + * tables. The only thing we need to be careful with is to + * skip any locked TSB entries during copy_tsb(). + * + * When we finish committing to the new TSB, we have to drop + * the lock and ask all other cpus running this address space + * to run tsb_context_switch() to see the new TSB table. + */ + spin_lock_irqsave(&mm->context.lock, flags); + + old_tsb = mm->context.tsb; + old_cache_index = (mm->context.tsb_reg_val & 0x7UL); + old_size = mm->context.tsb_nentries * sizeof(struct tsb); + + + /* Handle multiple threads trying to grow the TSB at the same time. + * One will get in here first, and bump the size and the RSS limit. + * The others will get in here next and hit this check. + */ + if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) { + spin_unlock_irqrestore(&mm->context.lock, flags); + + kmem_cache_free(tsb_caches[new_cache_index], new_tsb); + return; + } + + mm->context.tsb_rss_limit = new_rss_limit; + + if (old_tsb) { + extern void copy_tsb(unsigned long old_tsb_base, + unsigned long old_tsb_size, + unsigned long new_tsb_base, + unsigned long new_tsb_size); + unsigned long old_tsb_base = (unsigned long) old_tsb; + unsigned long new_tsb_base = (unsigned long) new_tsb; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + old_tsb_base = __pa(old_tsb_base); + new_tsb_base = __pa(new_tsb_base); + } + copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); + } + + mm->context.tsb = new_tsb; + setup_tsb_params(mm, new_size); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + /* If old_tsb is NULL, we're being invoked for the first time + * from init_new_context(). + */ + if (old_tsb) { + /* Reload it on the local cpu. */ + tsb_context_switch(mm); + + /* Now force other processors to do the same. */ + smp_tsb_sync(mm); + + /* Now it is safe to free the old tsb. */ + kmem_cache_free(tsb_caches[old_cache_index], old_tsb); + } +} + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + spin_lock_init(&mm->context.lock); + + mm->context.sparc64_ctx_val = 0UL; + + /* copy_mm() copies over the parent's mm_struct before calling + * us, so we need to zero out the TSB pointer or else tsb_grow() + * will be confused and think there is an older TSB to free up. + */ + mm->context.tsb = NULL; + + /* If this is fork, inherit the parent's TSB size. We would + * grow it to that size on the first page fault anyways. + */ + tsb_grow(mm, get_mm_rss(mm)); + + if (unlikely(!mm->context.tsb)) + return -ENOMEM; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + unsigned long flags, cache_index; + + cache_index = (mm->context.tsb_reg_val & 0x7UL); + kmem_cache_free(tsb_caches[cache_index], mm->context.tsb); + + /* We can remove these later, but for now it's useful + * to catch any bogus post-destroy_context() references + * to the TSB. + */ + mm->context.tsb = NULL; + mm->context.tsb_reg_val = 0UL; + + spin_lock_irqsave(&ctx_alloc_lock, flags); + + if (CTX_VALID(mm->context)) { + unsigned long nr = CTX_NRBITS(mm->context); + mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); + } + + spin_unlock_irqrestore(&ctx_alloc_lock, flags); +} diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index e4c9151..f8479fa 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -15,6 +15,7 @@ #include <asm/head.h> #include <asm/thread_info.h> #include <asm/cacheflush.h> +#include <asm/hypervisor.h> /* Basically, most of the Spitfire vs. Cheetah madness * has to do with the fact that Cheetah does not support @@ -29,16 +30,18 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ +__flush_tlb_mm: /* 18 insns */ + /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 bne,pn %icc, __spitfire_flush_tlb_mm_slow mov 0x50, %g3 stxa %g0, [%g3] ASI_DMMU_DEMAP stxa %g0, [%g3] ASI_IMMU_DEMAP + sethi %hi(KERNBASE), %g3 + flush %g3 retl - flush %g6 - nop + nop nop nop nop @@ -51,7 +54,7 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: +__flush_tlb_pending: /* 26 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -72,7 +75,8 @@ __flush_tlb_pending: brnz,pt %o1, 1b nop stxa %g2, [%o4] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o4 + flush %o4 retl wrpr %g7, 0x0, %pstate nop @@ -82,7 +86,8 @@ __flush_tlb_pending: .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* %o0=start, %o1=end */ +__flush_tlb_kernel_range: /* 16 insns */ + /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f sethi %hi(PAGE_SIZE), %o4 @@ -94,8 +99,11 @@ __flush_tlb_kernel_range: /* %o0=start, %o1=end */ membar #Sync brnz,pt %o3, 1b sub %o3, %o4, %o3 -2: retl - flush %g6 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -105,7 +113,8 @@ __spitfire_flush_tlb_mm_slow: stxa %g0, [%g3] ASI_IMMU_DEMAP flush %g6 stxa %g2, [%o1] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o1 + flush %o1 retl wrpr %g1, 0, %pstate @@ -181,7 +190,7 @@ __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ .previous /* Cheetah specific versions, patched at boot time. */ -__cheetah_flush_tlb_mm: /* 18 insns */ +__cheetah_flush_tlb_mm: /* 19 insns */ rdpr %pstate, %g7 andn %g7, PSTATE_IE, %g2 wrpr %g2, 0x0, %pstate @@ -196,12 +205,13 @@ __cheetah_flush_tlb_mm: /* 18 insns */ stxa %g0, [%g3] ASI_DMMU_DEMAP stxa %g0, [%g3] ASI_IMMU_DEMAP stxa %g2, [%o2] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o2 + flush %o2 wrpr %g0, 0, %tl retl wrpr %g7, 0x0, %pstate -__cheetah_flush_tlb_pending: /* 26 insns */ +__cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -225,7 +235,8 @@ __cheetah_flush_tlb_pending: /* 26 insns */ brnz,pt %o1, 1b nop stxa %g2, [%o4] ASI_DMMU - flush %g6 + sethi %hi(KERNBASE), %o4 + flush %o4 wrpr %g0, 0, %tl retl wrpr %g7, 0x0, %pstate @@ -245,7 +256,76 @@ __cheetah_flush_dcache_page: /* 11 insns */ nop #endif /* DCACHE_ALIASING_POSSIBLE */ -cheetah_patch_one: + /* Hypervisor specific versions, patched at boot time. */ +__hypervisor_tlb_tl0_error: + save %sp, -192, %sp + mov %i0, %o0 + call hypervisor_tlbop_error + mov %i1, %o1 + ret + restore + +__hypervisor_flush_tlb_mm: /* 10 insns */ + mov %o0, %o2 /* ARG2: mmu context */ + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop + +__hypervisor_flush_tlb_pending: /* 16 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + sllx %o1, 3, %g1 + mov %o2, %g2 + mov %o0, %g3 +1: sub %g1, (1 << 3), %g1 + ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g3, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g1, 1b + nop + retl + nop + +__hypervisor_flush_tlb_kernel_range: /* 16 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sethi %hi(PAGE_SIZE), %g3 + mov %o0, %g1 + sub %o1, %g1, %g2 + sub %g2, %g3, %g2 +1: add %g1, %g2, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g2, 1b + sub %g2, %g3, %g2 +2: retl + nop + +#ifdef DCACHE_ALIASING_POSSIBLE + /* XXX Niagara and friends have an 8K cache, so no aliasing is + * XXX possible, but nothing explicit in the Hypervisor API + * XXX guarantees this. + */ +__hypervisor_flush_dcache_page: /* 2 insns */ + retl + nop +#endif + +tlb_patch_one: 1: lduw [%o1], %g1 stw %g1, [%o0] flush %o0 @@ -264,22 +344,22 @@ cheetah_patch_cachetlbops: or %o0, %lo(__flush_tlb_mm), %o0 sethi %hi(__cheetah_flush_tlb_mm), %o1 or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call cheetah_patch_one - mov 18, %o2 + call tlb_patch_one + mov 19, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call cheetah_patch_one - mov 26, %o2 + call tlb_patch_one + mov 27, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 or %o0, %lo(__flush_dcache_page), %o0 sethi %hi(__cheetah_flush_dcache_page), %o1 or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call cheetah_patch_one + call tlb_patch_one mov 11, %o2 #endif /* DCACHE_ALIASING_POSSIBLE */ @@ -295,16 +375,14 @@ cheetah_patch_cachetlbops: * %g1 address arg 1 (tlb page and range flushes) * %g7 address arg 2 (tlb range flush only) * - * %g6 ivector table, don't touch - * %g2 scratch 1 - * %g3 scratch 2 - * %g4 scratch 3 - * - * TODO: Make xcall TLB range flushes use the tricks above... -DaveM + * %g6 scratch 1 + * %g2 scratch 2 + * %g3 scratch 3 + * %g4 scratch 4 */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: +xcall_flush_tlb_mm: /* 21 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -316,9 +394,19 @@ xcall_flush_tlb_mm: stxa %g0, [%g4] ASI_IMMU_DEMAP stxa %g3, [%g2] ASI_DMMU retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: +xcall_flush_tlb_pending: /* 21 insns */ /* %g5=context, %g1=nr, %g7=vaddrs[] */ sllx %g1, 3, %g1 mov PRIMARY_CONTEXT, %g4 @@ -341,9 +429,10 @@ xcall_flush_tlb_pending: nop stxa %g2, [%g4] ASI_DMMU retry + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: +xcall_flush_tlb_kernel_range: /* 25 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 @@ -360,14 +449,30 @@ xcall_flush_tlb_kernel_range: retry nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop /* This runs in a very controlled environment, so we do * not need to worry about BH races etc. */ .globl xcall_sync_tick xcall_sync_tick: - rdpr %pstate, %g2 + +661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %pil, %g2 wrpr %g0, 15, %pil sethi %hi(109f), %g7 @@ -390,8 +495,15 @@ xcall_sync_tick: */ .globl xcall_report_regs xcall_report_regs: - rdpr %pstate, %g2 + +661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %pil, %g2 wrpr %g0, 15, %pil sethi %hi(109f), %g7 @@ -453,62 +565,96 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address nop nop - .data - -errata32_hwbug: - .xword 0 - - .text - - /* These two are not performance critical... */ - .globl xcall_flush_tlb_all_spitfire -xcall_flush_tlb_all_spitfire: - /* Spitfire Errata #32 workaround. */ - sethi %hi(errata32_hwbug), %g4 - stx %g0, [%g4 + %lo(errata32_hwbug)] - - clr %g2 - clr %g3 -1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4 - and %g4, _PAGE_L, %g5 - brnz,pn %g5, 2f - mov TLB_TAG_ACCESS, %g7 - - stxa %g0, [%g7] ASI_DMMU - membar #Sync - stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS + /* %g5: error + * %g6: tlb op + */ +__hypervisor_tlb_xcall_error: + mov %g5, %g4 + mov %g6, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o0 + call hypervisor_tlbop_error_xcall + mov %l5, %o1 + ba,a,pt %xcc, rtrap_clr_l6 + + .globl __hypervisor_xcall_flush_tlb_mm +__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ + /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ + mov %o0, %g2 + mov %o1, %g3 + mov %o2, %g4 + mov %o3, %g1 + mov %o5, %g7 + clr %o0 /* ARG0: CPU lists unimplemented */ + clr %o1 /* ARG1: CPU lists unimplemented */ + mov %g5, %o2 /* ARG2: mmu context */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov HV_FAST_MMU_DEMAP_CTX, %g6 + brnz,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 + mov %g1, %o3 + mov %g7, %o5 membar #Sync + retry - /* Spitfire Errata #32 workaround. */ - sethi %hi(errata32_hwbug), %g4 - stx %g0, [%g4 + %lo(errata32_hwbug)] - -2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4 - and %g4, _PAGE_L, %g5 - brnz,pn %g5, 2f - mov TLB_TAG_ACCESS, %g7 - - stxa %g0, [%g7] ASI_IMMU - membar #Sync - stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS + .globl __hypervisor_xcall_flush_tlb_pending +__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ + /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ + sllx %g1, 3, %g1 + mov %o0, %g2 + mov %o1, %g3 + mov %o2, %g4 +1: sub %g1, (1 << 3), %g1 + ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g5, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 + brnz,a,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + brnz,pt %g1, 1b + nop + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 membar #Sync - - /* Spitfire Errata #32 workaround. */ - sethi %hi(errata32_hwbug), %g4 - stx %g0, [%g4 + %lo(errata32_hwbug)] - -2: add %g2, 1, %g2 - cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT - ble,pt %icc, 1b - sll %g2, 3, %g3 - flush %g6 retry - .globl xcall_flush_tlb_all_cheetah -xcall_flush_tlb_all_cheetah: - mov 0x80, %g2 - stxa %g0, [%g2] ASI_DMMU_DEMAP - stxa %g0, [%g2] ASI_IMMU_DEMAP + .globl __hypervisor_xcall_flush_tlb_kernel_range +__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ + /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + add %g2, 1, %g2 + sub %g3, %g2, %g3 + mov %o0, %g2 + mov %o1, %g4 + mov %o2, %g7 +1: add %g1, %g3, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 + brnz,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + sethi %hi(PAGE_SIZE), %o2 + brnz,pt %g3, 1b + sub %g3, %o2, %g3 + mov %g2, %o0 + mov %g4, %o1 + mov %g7, %o2 + membar #Sync retry /* These just get rescheduled to PIL vectors. */ @@ -527,4 +673,70 @@ xcall_capture: wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint retry + .globl xcall_new_mmu_context_version +xcall_new_mmu_context_version: + wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint + retry + #endif /* CONFIG_SMP */ + + + .globl hypervisor_patch_cachetlbops +hypervisor_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__hypervisor_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 + call tlb_patch_one + mov 10, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__hypervisor_flush_tlb_pending), %o1 + or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 + call tlb_patch_one + mov 16, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 16, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__hypervisor_flush_dcache_page), %o1 + or %o1, %lo(__hypervisor_flush_dcache_page), %o1 + call tlb_patch_one + mov 2, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_mm), %o0 + or %o0, %lo(xcall_flush_tlb_mm), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 + call tlb_patch_one + mov 21, %o2 + + sethi %hi(xcall_flush_tlb_pending), %o0 + or %o0, %lo(xcall_flush_tlb_pending), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + call tlb_patch_one + mov 21, %o2 + + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 25, %o2 +#endif /* CONFIG_SMP */ + + ret + restore diff --git a/arch/sparc64/prom/cif.S b/arch/sparc64/prom/cif.S index 29d0ae7..5f27ad7 100644 --- a/arch/sparc64/prom/cif.S +++ b/arch/sparc64/prom/cif.S @@ -1,10 +1,12 @@ /* cif.S: PROM entry/exit assembler trampolines. * - * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 2005 David S. Miller <davem@davemloft.net> + * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 2005, 2006 David S. Miller <davem@davemloft.net> */ #include <asm/pstate.h> +#include <asm/cpudata.h> +#include <asm/thread_info.h> .text .globl prom_cif_interface @@ -12,78 +14,16 @@ prom_cif_interface: sethi %hi(p1275buf), %o0 or %o0, %lo(p1275buf), %o0 ldx [%o0 + 0x010], %o1 ! prom_cif_stack - save %o1, -0x190, %sp + save %o1, -192, %sp ldx [%i0 + 0x008], %l2 ! prom_cif_handler - rdpr %pstate, %l4 - wrpr %g0, 0x15, %pstate ! save alternate globals - stx %g1, [%sp + 2047 + 0x0b0] - stx %g2, [%sp + 2047 + 0x0b8] - stx %g3, [%sp + 2047 + 0x0c0] - stx %g4, [%sp + 2047 + 0x0c8] - stx %g5, [%sp + 2047 + 0x0d0] - stx %g6, [%sp + 2047 + 0x0d8] - stx %g7, [%sp + 2047 + 0x0e0] - wrpr %g0, 0x814, %pstate ! save interrupt globals - stx %g1, [%sp + 2047 + 0x0e8] - stx %g2, [%sp + 2047 + 0x0f0] - stx %g3, [%sp + 2047 + 0x0f8] - stx %g4, [%sp + 2047 + 0x100] - stx %g5, [%sp + 2047 + 0x108] - stx %g6, [%sp + 2047 + 0x110] - stx %g7, [%sp + 2047 + 0x118] - wrpr %g0, 0x14, %pstate ! save normal globals - stx %g1, [%sp + 2047 + 0x120] - stx %g2, [%sp + 2047 + 0x128] - stx %g3, [%sp + 2047 + 0x130] - stx %g4, [%sp + 2047 + 0x138] - stx %g5, [%sp + 2047 + 0x140] - stx %g6, [%sp + 2047 + 0x148] - stx %g7, [%sp + 2047 + 0x150] - wrpr %g0, 0x414, %pstate ! save mmu globals - stx %g1, [%sp + 2047 + 0x158] - stx %g2, [%sp + 2047 + 0x160] - stx %g3, [%sp + 2047 + 0x168] - stx %g4, [%sp + 2047 + 0x170] - stx %g5, [%sp + 2047 + 0x178] - stx %g6, [%sp + 2047 + 0x180] - stx %g7, [%sp + 2047 + 0x188] - mov %g1, %l0 ! also save to locals, so we can handle - mov %g2, %l1 ! tlb faults later on, when accessing - mov %g3, %l3 ! the stack. - mov %g7, %l5 - wrpr %l4, PSTATE_IE, %pstate ! turn off interrupts + mov %g4, %l0 + mov %g5, %l1 + mov %g6, %l3 call %l2 add %i0, 0x018, %o0 ! prom_args - wrpr %g0, 0x414, %pstate ! restore mmu globals - mov %l0, %g1 - mov %l1, %g2 - mov %l3, %g3 - mov %l5, %g7 - wrpr %g0, 0x14, %pstate ! restore normal globals - ldx [%sp + 2047 + 0x120], %g1 - ldx [%sp + 2047 + 0x128], %g2 - ldx [%sp + 2047 + 0x130], %g3 - ldx [%sp + 2047 + 0x138], %g4 - ldx [%sp + 2047 + 0x140], %g5 - ldx [%sp + 2047 + 0x148], %g6 - ldx [%sp + 2047 + 0x150], %g7 - wrpr %g0, 0x814, %pstate ! restore interrupt globals - ldx [%sp + 2047 + 0x0e8], %g1 - ldx [%sp + 2047 + 0x0f0], %g2 - ldx [%sp + 2047 + 0x0f8], %g3 - ldx [%sp + 2047 + 0x100], %g4 - ldx [%sp + 2047 + 0x108], %g5 - ldx [%sp + 2047 + 0x110], %g6 - ldx [%sp + 2047 + 0x118], %g7 - wrpr %g0, 0x15, %pstate ! restore alternate globals - ldx [%sp + 2047 + 0x0b0], %g1 - ldx [%sp + 2047 + 0x0b8], %g2 - ldx [%sp + 2047 + 0x0c0], %g3 - ldx [%sp + 2047 + 0x0c8], %g4 - ldx [%sp + 2047 + 0x0d0], %g5 - ldx [%sp + 2047 + 0x0d8], %g6 - ldx [%sp + 2047 + 0x0e0], %g7 - wrpr %l4, 0, %pstate ! restore original pstate + mov %l0, %g4 + mov %l1, %g5 + mov %l3, %g6 ret restore @@ -91,135 +31,18 @@ prom_cif_interface: prom_cif_callback: sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - save %sp, -0x270, %sp - rdpr %pstate, %l4 - wrpr %g0, 0x15, %pstate ! save PROM alternate globals - stx %g1, [%sp + 2047 + 0x0b0] - stx %g2, [%sp + 2047 + 0x0b8] - stx %g3, [%sp + 2047 + 0x0c0] - stx %g4, [%sp + 2047 + 0x0c8] - stx %g5, [%sp + 2047 + 0x0d0] - stx %g6, [%sp + 2047 + 0x0d8] - stx %g7, [%sp + 2047 + 0x0e0] - ! restore Linux alternate globals - ldx [%sp + 2047 + 0x190], %g1 - ldx [%sp + 2047 + 0x198], %g2 - ldx [%sp + 2047 + 0x1a0], %g3 - ldx [%sp + 2047 + 0x1a8], %g4 - ldx [%sp + 2047 + 0x1b0], %g5 - ldx [%sp + 2047 + 0x1b8], %g6 - ldx [%sp + 2047 + 0x1c0], %g7 - wrpr %g0, 0x814, %pstate ! save PROM interrupt globals - stx %g1, [%sp + 2047 + 0x0e8] - stx %g2, [%sp + 2047 + 0x0f0] - stx %g3, [%sp + 2047 + 0x0f8] - stx %g4, [%sp + 2047 + 0x100] - stx %g5, [%sp + 2047 + 0x108] - stx %g6, [%sp + 2047 + 0x110] - stx %g7, [%sp + 2047 + 0x118] - ! restore Linux interrupt globals - ldx [%sp + 2047 + 0x1c8], %g1 - ldx [%sp + 2047 + 0x1d0], %g2 - ldx [%sp + 2047 + 0x1d8], %g3 - ldx [%sp + 2047 + 0x1e0], %g4 - ldx [%sp + 2047 + 0x1e8], %g5 - ldx [%sp + 2047 + 0x1f0], %g6 - ldx [%sp + 2047 + 0x1f8], %g7 - wrpr %g0, 0x14, %pstate ! save PROM normal globals - stx %g1, [%sp + 2047 + 0x120] - stx %g2, [%sp + 2047 + 0x128] - stx %g3, [%sp + 2047 + 0x130] - stx %g4, [%sp + 2047 + 0x138] - stx %g5, [%sp + 2047 + 0x140] - stx %g6, [%sp + 2047 + 0x148] - stx %g7, [%sp + 2047 + 0x150] - ! restore Linux normal globals - ldx [%sp + 2047 + 0x200], %g1 - ldx [%sp + 2047 + 0x208], %g2 - ldx [%sp + 2047 + 0x210], %g3 - ldx [%sp + 2047 + 0x218], %g4 - ldx [%sp + 2047 + 0x220], %g5 - ldx [%sp + 2047 + 0x228], %g6 - ldx [%sp + 2047 + 0x230], %g7 - wrpr %g0, 0x414, %pstate ! save PROM mmu globals - stx %g1, [%sp + 2047 + 0x158] - stx %g2, [%sp + 2047 + 0x160] - stx %g3, [%sp + 2047 + 0x168] - stx %g4, [%sp + 2047 + 0x170] - stx %g5, [%sp + 2047 + 0x178] - stx %g6, [%sp + 2047 + 0x180] - stx %g7, [%sp + 2047 + 0x188] - ! restore Linux mmu globals - ldx [%sp + 2047 + 0x238], %o0 - ldx [%sp + 2047 + 0x240], %o1 - ldx [%sp + 2047 + 0x248], %l2 - ldx [%sp + 2047 + 0x250], %l3 - ldx [%sp + 2047 + 0x258], %l5 - ldx [%sp + 2047 + 0x260], %l6 - ldx [%sp + 2047 + 0x268], %l7 - ! switch to Linux tba - sethi %hi(sparc64_ttable_tl0), %l1 - rdpr %tba, %l0 ! save PROM tba - mov %o0, %g1 - mov %o1, %g2 - mov %l2, %g3 - mov %l3, %g4 - mov %l5, %g5 - mov %l6, %g6 - mov %l7, %g7 - wrpr %l1, %tba ! install Linux tba - wrpr %l4, 0, %pstate ! restore PSTATE + save %sp, -192, %sp + TRAP_LOAD_THREAD_REG(%g6, %g1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %o0) + ldx [%g6 + TI_TASK], %g4 call prom_world - mov %g0, %o0 + mov 0, %o0 ldx [%i1 + 0x000], %l2 call %l2 mov %i0, %o0 mov %o0, %l1 call prom_world - or %g0, 1, %o0 - wrpr %g0, 0x14, %pstate ! interrupts off - ! restore PROM mmu globals - ldx [%sp + 2047 + 0x158], %o0 - ldx [%sp + 2047 + 0x160], %o1 - ldx [%sp + 2047 + 0x168], %l2 - ldx [%sp + 2047 + 0x170], %l3 - ldx [%sp + 2047 + 0x178], %l5 - ldx [%sp + 2047 + 0x180], %l6 - ldx [%sp + 2047 + 0x188], %l7 - wrpr %g0, 0x414, %pstate ! restore PROM mmu globals - mov %o0, %g1 - mov %o1, %g2 - mov %l2, %g3 - mov %l3, %g4 - mov %l5, %g5 - mov %l6, %g6 - mov %l7, %g7 - wrpr %l0, %tba ! restore PROM tba - wrpr %g0, 0x14, %pstate ! restore PROM normal globals - ldx [%sp + 2047 + 0x120], %g1 - ldx [%sp + 2047 + 0x128], %g2 - ldx [%sp + 2047 + 0x130], %g3 - ldx [%sp + 2047 + 0x138], %g4 - ldx [%sp + 2047 + 0x140], %g5 - ldx [%sp + 2047 + 0x148], %g6 - ldx [%sp + 2047 + 0x150], %g7 - wrpr %g0, 0x814, %pstate ! restore PROM interrupt globals - ldx [%sp + 2047 + 0x0e8], %g1 - ldx [%sp + 2047 + 0x0f0], %g2 - ldx [%sp + 2047 + 0x0f8], %g3 - ldx [%sp + 2047 + 0x100], %g4 - ldx [%sp + 2047 + 0x108], %g5 - ldx [%sp + 2047 + 0x110], %g6 - ldx [%sp + 2047 + 0x118], %g7 - wrpr %g0, 0x15, %pstate ! restore PROM alternate globals - ldx [%sp + 2047 + 0x0b0], %g1 - ldx [%sp + 2047 + 0x0b8], %g2 - ldx [%sp + 2047 + 0x0c0], %g3 - ldx [%sp + 2047 + 0x0c8], %g4 - ldx [%sp + 2047 + 0x0d0], %g5 - ldx [%sp + 2047 + 0x0d8], %g6 - ldx [%sp + 2047 + 0x0e0], %g7 - wrpr %l4, 0, %pstate + mov 1, %o0 ret restore %l1, 0, %o0 diff --git a/arch/sparc64/prom/console.c b/arch/sparc64/prom/console.c index ac6d035..7c25c54 100644 --- a/arch/sparc64/prom/console.c +++ b/arch/sparc64/prom/console.c @@ -102,6 +102,9 @@ prom_query_input_device(void) if (!strncmp (propb, "rsc", 3)) return PROMDEV_IRSC; + if (!strncmp (propb, "virtual-console", 3)) + return PROMDEV_IVCONS; + if (strncmp (propb, "tty", 3) || !propb[3]) return PROMDEV_I_UNK; @@ -143,6 +146,9 @@ prom_query_output_device(void) if (!strncmp (propb, "rsc", 3)) return PROMDEV_ORSC; + if (!strncmp (propb, "virtual-console", 3)) + return PROMDEV_OVCONS; + if (strncmp (propb, "tty", 3) || !propb[3]) return PROMDEV_O_UNK; diff --git a/arch/sparc64/prom/init.c b/arch/sparc64/prom/init.c index f3cc2d8..1c0db84 100644 --- a/arch/sparc64/prom/init.c +++ b/arch/sparc64/prom/init.c @@ -14,11 +14,10 @@ #include <asm/openprom.h> #include <asm/oplib.h> -enum prom_major_version prom_vers; -unsigned int prom_rev, prom_prev; +/* OBP version string. */ +char prom_version[80]; /* The root node of the prom device tree. */ -int prom_root_node; int prom_stdin, prom_stdout; int prom_chosen_node; @@ -31,68 +30,25 @@ extern void prom_cif_init(void *, void *); void __init prom_init(void *cif_handler, void *cif_stack) { - char buffer[80], *p; - int ints[3]; int node; - int i = 0; - int bufadjust; - - prom_vers = PROM_P1275; prom_cif_init(cif_handler, cif_stack); - prom_root_node = prom_getsibling(0); - if((prom_root_node == 0) || (prom_root_node == -1)) - prom_halt(); - prom_chosen_node = prom_finddevice(prom_chosen_path); if (!prom_chosen_node || prom_chosen_node == -1) prom_halt(); - prom_stdin = prom_getint (prom_chosen_node, "stdin"); - prom_stdout = prom_getint (prom_chosen_node, "stdout"); + prom_stdin = prom_getint(prom_chosen_node, "stdin"); + prom_stdout = prom_getint(prom_chosen_node, "stdout"); node = prom_finddevice("/openprom"); if (!node || node == -1) prom_halt(); - prom_getstring (node, "version", buffer, sizeof (buffer)); - - prom_printf ("\n"); - - if (strncmp (buffer, "OBP ", 4)) - goto strange_version; - - /* - * Version field is expected to be 'OBP xx.yy.zz date...' - * However, Sun can't stick to this format very well, so - * we need to check for 'OBP xx.yy.zz date...' and adjust - * accordingly. -spot - */ - - if (strncmp (buffer, "OBP ", 5)) - bufadjust = 4; - else - bufadjust = 5; - - p = buffer + bufadjust; - while (p && isdigit(*p) && i < 3) { - ints[i++] = simple_strtoul(p, NULL, 0); - if ((p = strchr(p, '.')) != NULL) - p++; - } - if (i != 3) - goto strange_version; - - prom_rev = ints[1]; - prom_prev = (ints[0] << 16) | (ints[1] << 8) | ints[2]; - - printk ("PROMLIB: Sun IEEE Boot Prom %s\n", buffer + bufadjust); + prom_getstring(node, "version", prom_version, sizeof(prom_version)); - /* Initialization successful. */ - return; + prom_printf("\n"); -strange_version: - prom_printf ("Strange OBP version `%s'.\n", buffer); - prom_halt (); + printk("PROMLIB: Sun IEEE Boot Prom '%s'\n", prom_version); + printk("PROMLIB: Root node compatible: %s\n", prom_root_compatible); } diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index 87f5cfc..577bde8 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -112,28 +112,20 @@ unsigned char prom_get_idprom(char *idbuf, int num_bytes) return 0xff; } -/* Get the major prom version number. */ -int prom_version(void) -{ - return PROM_P1275; -} - -/* Get the prom plugin-revision. */ -int prom_getrev(void) -{ - return prom_rev; -} - -/* Get the prom firmware print revision. */ -int prom_getprev(void) +/* Install Linux trap table so PROM uses that instead of its own. */ +void prom_set_trap_table(unsigned long tba) { - return prom_prev; + p1275_cmd("SUNW,set-trap-table", + (P1275_ARG(0, P1275_ARG_IN_64B) | + P1275_INOUT(1, 0)), tba); } -/* Install Linux trap table so PROM uses that instead of its own. */ -void prom_set_trap_table(unsigned long tba) +void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa) { - p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba); + p1275_cmd("SUNW,set-trap-table", + (P1275_ARG(0, P1275_ARG_IN_64B) | + P1275_ARG(1, P1275_ARG_IN_64B) | + P1275_INOUT(2, 0)), tba, mmfsa); } int prom_get_mmu_ihandle(void) @@ -303,9 +295,21 @@ int prom_wakeupsystem(void) } #ifdef CONFIG_SMP -void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0) +void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg) +{ + p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, arg); +} + +void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg) +{ + p1275_cmd("SUNW,start-cpu-by-cpuid", P1275_INOUT(3, 0), + cpuid, pc, arg); +} + +void prom_stopcpu_cpuid(int cpuid) { - p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, o0); + p1275_cmd("SUNW,stop-cpu-by-cpuid", P1275_INOUT(1, 0), + cpuid); } void prom_stopself(void) diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c index a5a7c57..2b32c48 100644 --- a/arch/sparc64/prom/p1275.c +++ b/arch/sparc64/prom/p1275.c @@ -30,16 +30,6 @@ extern void prom_world(int); extern void prom_cif_interface(void); extern void prom_cif_callback(void); -static inline unsigned long spitfire_get_primary_context(void) -{ - unsigned long ctx; - - __asm__ __volatile__("ldxa [%1] %2, %0" - : "=r" (ctx) - : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); - return ctx; -} - /* * This provides SMP safety on the p1275buf. prom_callback() drops this lock * to allow recursuve acquisition. @@ -55,7 +45,6 @@ long p1275_cmd(const char *service, long fmt, ...) long attrs, x; p = p1275buf.prom_buffer; - BUG_ON((spitfire_get_primary_context() & CTX_NR_MASK) != 0); spin_lock_irqsave(&prom_entry_lock, flags); diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c index b1ff9e8..49075ab 100644 --- a/arch/sparc64/prom/tree.c +++ b/arch/sparc64/prom/tree.c @@ -51,7 +51,7 @@ prom_getparent(int node) __inline__ int __prom_getsibling(int node) { - return p1275_cmd ("peer", P1275_INOUT(1, 1), node); + return p1275_cmd(prom_peer_name, P1275_INOUT(1, 1), node); } __inline__ int @@ -59,9 +59,12 @@ prom_getsibling(int node) { int sibnode; - if(node == -1) return 0; + if (node == -1) + return 0; sibnode = __prom_getsibling(node); - if(sibnode == -1) return 0; + if (sibnode == -1) + return 0; + return sibnode; } diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c index 3ab4677..5284996 100644 --- a/arch/sparc64/solaris/misc.c +++ b/arch/sparc64/solaris/misc.c @@ -90,7 +90,7 @@ static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 o len = PAGE_ALIGN(len); if(!(flags & MAP_FIXED)) addr = 0; - else if (len > 0xf0000000UL || addr > 0xf0000000UL - len) + else if (len > STACK_TOP32 || addr > STACK_TOP32 - len) goto out_putf; ret_type = flags & _MAP_NEW; flags &= ~_MAP_NEW; @@ -102,7 +102,7 @@ static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 o (unsigned long) prot, (unsigned long) flags, off); up_write(¤t->mm->mmap_sem); if(!ret_type) - retval = ((retval < 0xf0000000) ? 0 : retval); + retval = ((retval < STACK_TOP32) ? 0 : retval); out_putf: if (file) diff --git a/block/as-iosched.c b/block/as-iosched.c index 8da3cf6..296708c 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -182,6 +182,9 @@ struct as_rq { static kmem_cache_t *arq_pool; +static atomic_t ioc_count = ATOMIC_INIT(0); +static struct completion *ioc_gone; + static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); static void as_antic_stop(struct as_data *ad); @@ -193,6 +196,15 @@ static void as_antic_stop(struct as_data *ad); static void free_as_io_context(struct as_io_context *aic) { kfree(aic); + if (atomic_dec_and_test(&ioc_count) && ioc_gone) + complete(ioc_gone); +} + +static void as_trim(struct io_context *ioc) +{ + if (ioc->aic) + free_as_io_context(ioc->aic); + ioc->aic = NULL; } /* Called when the task exits */ @@ -220,6 +232,7 @@ static struct as_io_context *alloc_as_io_context(void) ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; + atomic_inc(&ioc_count); } return ret; @@ -1696,11 +1709,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) /* * sysfs parts below */ -struct as_fs_entry { - struct attribute attr; - ssize_t (*show)(struct as_data *, char *); - ssize_t (*store)(struct as_data *, const char *, size_t); -}; static ssize_t as_var_show(unsigned int var, char *page) @@ -1717,8 +1725,9 @@ as_var_store(unsigned long *var, const char *page, size_t count) return count; } -static ssize_t as_est_show(struct as_data *ad, char *page) +static ssize_t est_time_show(elevator_t *e, char *page) { + struct as_data *ad = e->elevator_data; int pos = 0; pos += sprintf(page+pos, "%lu %% exit probability\n", @@ -1734,21 +1743,23 @@ static ssize_t as_est_show(struct as_data *ad, char *page) } #define SHOW_FUNCTION(__FUNC, __VAR) \ -static ssize_t __FUNC(struct as_data *ad, char *page) \ +static ssize_t __FUNC(elevator_t *e, char *page) \ { \ + struct as_data *ad = e->elevator_data; \ return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ } -SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]); -SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]); -SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire); -SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]); -SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]); +SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]); +SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]); +SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire); +SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]); +SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ -static ssize_t __FUNC(struct as_data *ad, const char *page, size_t count) \ +static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ { \ - int ret = as_var_store(__PTR, (page), count); \ + struct as_data *ad = e->elevator_data; \ + int ret = as_var_store(__PTR, (page), count); \ if (*(__PTR) < (MIN)) \ *(__PTR) = (MIN); \ else if (*(__PTR) > (MAX)) \ @@ -1756,90 +1767,26 @@ static ssize_t __FUNC(struct as_data *ad, const char *page, size_t count) \ *(__PTR) = msecs_to_jiffies(*(__PTR)); \ return ret; \ } -STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); -STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); -STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX); -STORE_FUNCTION(as_read_batchexpire_store, +STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); +STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); +STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX); +STORE_FUNCTION(as_read_batch_expire_store, &ad->batch_expire[REQ_SYNC], 0, INT_MAX); -STORE_FUNCTION(as_write_batchexpire_store, +STORE_FUNCTION(as_write_batch_expire_store, &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); #undef STORE_FUNCTION -static struct as_fs_entry as_est_entry = { - .attr = {.name = "est_time", .mode = S_IRUGO }, - .show = as_est_show, -}; -static struct as_fs_entry as_readexpire_entry = { - .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR }, - .show = as_readexpire_show, - .store = as_readexpire_store, -}; -static struct as_fs_entry as_writeexpire_entry = { - .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR }, - .show = as_writeexpire_show, - .store = as_writeexpire_store, -}; -static struct as_fs_entry as_anticexpire_entry = { - .attr = {.name = "antic_expire", .mode = S_IRUGO | S_IWUSR }, - .show = as_anticexpire_show, - .store = as_anticexpire_store, -}; -static struct as_fs_entry as_read_batchexpire_entry = { - .attr = {.name = "read_batch_expire", .mode = S_IRUGO | S_IWUSR }, - .show = as_read_batchexpire_show, - .store = as_read_batchexpire_store, -}; -static struct as_fs_entry as_write_batchexpire_entry = { - .attr = {.name = "write_batch_expire", .mode = S_IRUGO | S_IWUSR }, - .show = as_write_batchexpire_show, - .store = as_write_batchexpire_store, -}; - -static struct attribute *default_attrs[] = { - &as_est_entry.attr, - &as_readexpire_entry.attr, - &as_writeexpire_entry.attr, - &as_anticexpire_entry.attr, - &as_read_batchexpire_entry.attr, - &as_write_batchexpire_entry.attr, - NULL, -}; - -#define to_as(atr) container_of((atr), struct as_fs_entry, attr) - -static ssize_t -as_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - elevator_t *e = container_of(kobj, elevator_t, kobj); - struct as_fs_entry *entry = to_as(attr); - - if (!entry->show) - return -EIO; - - return entry->show(e->elevator_data, page); -} - -static ssize_t -as_attr_store(struct kobject *kobj, struct attribute *attr, - const char *page, size_t length) -{ - elevator_t *e = container_of(kobj, elevator_t, kobj); - struct as_fs_entry *entry = to_as(attr); - - if (!entry->store) - return -EIO; - - return entry->store(e->elevator_data, page, length); -} - -static struct sysfs_ops as_sysfs_ops = { - .show = as_attr_show, - .store = as_attr_store, -}; - -static struct kobj_type as_ktype = { - .sysfs_ops = &as_sysfs_ops, - .default_attrs = default_attrs, +#define AS_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, as_##name##_show, as_##name##_store) + +static struct elv_fs_entry as_attrs[] = { + __ATTR_RO(est_time), + AS_ATTR(read_expire), + AS_ATTR(write_expire), + AS_ATTR(antic_expire), + AS_ATTR(read_batch_expire), + AS_ATTR(write_batch_expire), + __ATTR_NULL }; static struct elevator_type iosched_as = { @@ -1860,9 +1807,10 @@ static struct elevator_type iosched_as = { .elevator_may_queue_fn = as_may_queue, .elevator_init_fn = as_init_queue, .elevator_exit_fn = as_exit_queue, + .trim = as_trim, }, - .elevator_ktype = &as_ktype, + .elevator_attrs = as_attrs, .elevator_name = "anticipatory", .elevator_owner = THIS_MODULE, }; @@ -1893,7 +1841,13 @@ static int __init as_init(void) static void __exit as_exit(void) { + DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_as); + ioc_gone = &all_gone; + barrier(); + if (atomic_read(&ioc_count)) + complete(ioc_gone); + synchronize_rcu(); kmem_cache_destroy(arq_pool); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c8dbe38..c4a0d5d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -6,21 +6,13 @@ * * Copyright (C) 2003 Jens Axboe <axboe@suse.de> */ -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/blkdev.h> -#include <linux/elevator.h> -#include <linux/bio.h> #include <linux/config.h> #include <linux/module.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/compiler.h> +#include <linux/blkdev.h> +#include <linux/elevator.h> #include <linux/hash.h> #include <linux/rbtree.h> -#include <linux/mempool.h> #include <linux/ioprio.h> -#include <linux/writeback.h> /* * tunables @@ -47,6 +39,8 @@ static int cfq_slice_idle = HZ / 100; */ static const int cfq_max_depth = 2; +static DEFINE_RWLOCK(cfq_exit_lock); + /* * for the hash of cfqq inside the cfqd */ @@ -89,6 +83,9 @@ static kmem_cache_t *crq_pool; static kmem_cache_t *cfq_pool; static kmem_cache_t *cfq_ioc_pool; +static atomic_t ioc_count = ATOMIC_INIT(0); +static struct completion *ioc_gone; + #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) @@ -109,7 +106,6 @@ static kmem_cache_t *cfq_ioc_pool; * Per block device queue structure */ struct cfq_data { - atomic_t ref; request_queue_t *queue; /* @@ -175,6 +171,8 @@ struct cfq_data { unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; unsigned int cfq_max_depth; + + struct list_head cic_list; }; /* @@ -288,7 +286,7 @@ CFQ_CRQ_FNS(is_sync); static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); -static void cfq_put_cfqd(struct cfq_data *cfqd); +static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) @@ -1160,8 +1158,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) if (unlikely(cfqd->active_queue == cfqq)) __cfq_slice_expired(cfqd, cfqq, 0); - cfq_put_cfqd(cfqq->cfqd); - /* * it's on the empty list and still hashed */ @@ -1179,7 +1175,7 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, hlist_for_each_safe(entry, next, hash_list) { struct cfq_queue *__cfqq = list_entry_qhash(entry); - const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio); + const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) return __cfqq; @@ -1198,13 +1194,24 @@ static void cfq_free_io_context(struct cfq_io_context *cic) { struct cfq_io_context *__cic; struct list_head *entry, *next; + int freed = 1; list_for_each_safe(entry, next, &cic->list) { __cic = list_entry(entry, struct cfq_io_context, list); kmem_cache_free(cfq_ioc_pool, __cic); + freed++; } kmem_cache_free(cfq_ioc_pool, cic); + if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) + complete(ioc_gone); +} + +static void cfq_trim(struct io_context *ioc) +{ + ioc->set_ioprio = NULL; + if (ioc->cic) + cfq_free_io_context(ioc->cic); } /* @@ -1212,25 +1219,37 @@ static void cfq_free_io_context(struct cfq_io_context *cic) */ static void cfq_exit_single_io_context(struct cfq_io_context *cic) { - struct cfq_data *cfqd = cic->cfqq->cfqd; - request_queue_t *q = cfqd->queue; + struct cfq_data *cfqd = cic->key; + request_queue_t *q; + + if (!cfqd) + return; + + q = cfqd->queue; WARN_ON(!irqs_disabled()); spin_lock(q->queue_lock); - if (unlikely(cic->cfqq == cfqd->active_queue)) - __cfq_slice_expired(cfqd, cic->cfqq, 0); + if (cic->cfqq[ASYNC]) { + if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) + __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0); + cfq_put_queue(cic->cfqq[ASYNC]); + cic->cfqq[ASYNC] = NULL; + } + + if (cic->cfqq[SYNC]) { + if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) + __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0); + cfq_put_queue(cic->cfqq[SYNC]); + cic->cfqq[SYNC] = NULL; + } - cfq_put_queue(cic->cfqq); - cic->cfqq = NULL; + cic->key = NULL; + list_del_init(&cic->queue_list); spin_unlock(q->queue_lock); } -/* - * Another task may update the task cic list, if it is doing a queue lookup - * on its behalf. cfq_cic_lock excludes such concurrent updates - */ static void cfq_exit_io_context(struct cfq_io_context *cic) { struct cfq_io_context *__cic; @@ -1242,12 +1261,14 @@ static void cfq_exit_io_context(struct cfq_io_context *cic) /* * put the reference this task is holding to the various queues */ + read_lock(&cfq_exit_lock); list_for_each(entry, &cic->list) { __cic = list_entry(entry, struct cfq_io_context, list); cfq_exit_single_io_context(__cic); } cfq_exit_single_io_context(cic); + read_unlock(&cfq_exit_lock); local_irq_restore(flags); } @@ -1258,7 +1279,8 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) if (cic) { INIT_LIST_HEAD(&cic->list); - cic->cfqq = NULL; + cic->cfqq[ASYNC] = NULL; + cic->cfqq[SYNC] = NULL; cic->key = NULL; cic->last_end_request = jiffies; cic->ttime_total = 0; @@ -1266,6 +1288,8 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) cic->ttime_mean = 0; cic->dtor = cfq_free_io_context; cic->exit = cfq_exit_io_context; + INIT_LIST_HEAD(&cic->queue_list); + atomic_inc(&ioc_count); } return cic; @@ -1318,14 +1342,27 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) cfq_clear_cfqq_prio_changed(cfqq); } -static inline void changed_ioprio(struct cfq_queue *cfqq) +static inline void changed_ioprio(struct cfq_io_context *cic) { - if (cfqq) { - struct cfq_data *cfqd = cfqq->cfqd; - + struct cfq_data *cfqd = cic->key; + struct cfq_queue *cfqq; + if (cfqd) { spin_lock(cfqd->queue->queue_lock); - cfq_mark_cfqq_prio_changed(cfqq); - cfq_init_prio_data(cfqq); + cfqq = cic->cfqq[ASYNC]; + if (cfqq) { + struct cfq_queue *new_cfqq; + new_cfqq = cfq_get_queue(cfqd, CFQ_KEY_ASYNC, + cic->ioc->task, GFP_ATOMIC); + if (new_cfqq) { + cic->cfqq[ASYNC] = new_cfqq; + cfq_put_queue(cfqq); + } + } + cfqq = cic->cfqq[SYNC]; + if (cfqq) { + cfq_mark_cfqq_prio_changed(cfqq); + cfq_init_prio_data(cfqq); + } spin_unlock(cfqd->queue->queue_lock); } } @@ -1335,24 +1372,32 @@ static inline void changed_ioprio(struct cfq_queue *cfqq) */ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) { - struct cfq_io_context *cic = ioc->cic; + struct cfq_io_context *cic; + + write_lock(&cfq_exit_lock); + + cic = ioc->cic; - changed_ioprio(cic->cfqq); + changed_ioprio(cic); list_for_each_entry(cic, &cic->list, list) - changed_ioprio(cic->cfqq); + changed_ioprio(cic); + + write_unlock(&cfq_exit_lock); return 0; } static struct cfq_queue * -cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio, +cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask) { const int hashval = hash_long(key, CFQ_QHASH_SHIFT); struct cfq_queue *cfqq, *new_cfqq = NULL; + unsigned short ioprio; retry: + ioprio = tsk->ioprio; cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); if (!cfqq) { @@ -1381,7 +1426,6 @@ retry: hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); atomic_set(&cfqq->ref, 0); cfqq->cfqd = cfqd; - atomic_inc(&cfqd->ref); cfqq->service_last = 0; /* * set ->slice_left to allow preemption for a new process @@ -1419,6 +1463,7 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) if (!ioc) return NULL; +restart: if ((cic = ioc->cic) == NULL) { cic = cfq_alloc_io_context(cfqd, gfp_mask); @@ -1429,11 +1474,13 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) * manually increment generic io_context usage count, it * cannot go away since we are already holding one ref to it */ - ioc->cic = cic; - ioc->set_ioprio = cfq_ioc_set_ioprio; cic->ioc = ioc; cic->key = cfqd; - atomic_inc(&cfqd->ref); + read_lock(&cfq_exit_lock); + ioc->set_ioprio = cfq_ioc_set_ioprio; + ioc->cic = cic; + list_add(&cic->queue_list, &cfqd->cic_list); + read_unlock(&cfq_exit_lock); } else { struct cfq_io_context *__cic; @@ -1443,6 +1490,20 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) if (cic->key == cfqd) goto out; + if (unlikely(!cic->key)) { + read_lock(&cfq_exit_lock); + if (list_empty(&cic->list)) + ioc->cic = NULL; + else + ioc->cic = list_entry(cic->list.next, + struct cfq_io_context, + list); + read_unlock(&cfq_exit_lock); + kmem_cache_free(cfq_ioc_pool, cic); + atomic_dec(&ioc_count); + goto restart; + } + /* * cic exists, check if we already are there. linear search * should be ok here, the list will usually not be more than @@ -1457,6 +1518,14 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) cic = __cic; goto out; } + if (unlikely(!__cic->key)) { + read_lock(&cfq_exit_lock); + list_del(&__cic->list); + read_unlock(&cfq_exit_lock); + kmem_cache_free(cfq_ioc_pool, __cic); + atomic_dec(&ioc_count); + goto restart; + } } /* @@ -1469,8 +1538,10 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) __cic->ioc = ioc; __cic->key = cfqd; - atomic_inc(&cfqd->ref); + read_lock(&cfq_exit_lock); list_add(&__cic->list, &cic->list); + list_add(&__cic->queue_list, &cfqd->cic_list); + read_unlock(&cfq_exit_lock); cic = __cic; } @@ -1890,6 +1961,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, struct cfq_queue *cfqq; struct cfq_rq *crq; unsigned long flags; + int is_sync = key != CFQ_KEY_ASYNC; might_sleep_if(gfp_mask & __GFP_WAIT); @@ -1900,14 +1972,14 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, if (!cic) goto queue_fail; - if (!cic->cfqq) { - cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask); + if (!cic->cfqq[is_sync]) { + cfqq = cfq_get_queue(cfqd, key, tsk, gfp_mask); if (!cfqq) goto queue_fail; - cic->cfqq = cfqq; + cic->cfqq[is_sync] = cfqq; } else - cfqq = cic->cfqq; + cfqq = cic->cfqq[is_sync]; cfqq->allocated[rw]++; cfq_clear_cfqq_must_alloc(cfqq); @@ -1924,7 +1996,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, crq->cfq_queue = cfqq; crq->io_context = cic; - if (rw == READ || process_sync(tsk)) + if (is_sync) cfq_mark_crq_is_sync(crq); else cfq_clear_crq_is_sync(crq); @@ -2055,15 +2127,35 @@ static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) blk_sync_queue(cfqd->queue); } -static void cfq_put_cfqd(struct cfq_data *cfqd) +static void cfq_exit_queue(elevator_t *e) { + struct cfq_data *cfqd = e->elevator_data; request_queue_t *q = cfqd->queue; - if (!atomic_dec_and_test(&cfqd->ref)) - return; + cfq_shutdown_timer_wq(cfqd); + write_lock(&cfq_exit_lock); + spin_lock_irq(q->queue_lock); + if (cfqd->active_queue) + __cfq_slice_expired(cfqd, cfqd->active_queue, 0); + while(!list_empty(&cfqd->cic_list)) { + struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, + struct cfq_io_context, + queue_list); + if (cic->cfqq[ASYNC]) { + cfq_put_queue(cic->cfqq[ASYNC]); + cic->cfqq[ASYNC] = NULL; + } + if (cic->cfqq[SYNC]) { + cfq_put_queue(cic->cfqq[SYNC]); + cic->cfqq[SYNC] = NULL; + } + cic->key = NULL; + list_del_init(&cic->queue_list); + } + spin_unlock_irq(q->queue_lock); + write_unlock(&cfq_exit_lock); cfq_shutdown_timer_wq(cfqd); - blk_put_queue(q); mempool_destroy(cfqd->crq_pool); kfree(cfqd->crq_hash); @@ -2071,14 +2163,6 @@ static void cfq_put_cfqd(struct cfq_data *cfqd) kfree(cfqd); } -static void cfq_exit_queue(elevator_t *e) -{ - struct cfq_data *cfqd = e->elevator_data; - - cfq_shutdown_timer_wq(cfqd); - cfq_put_cfqd(cfqd); -} - static int cfq_init_queue(request_queue_t *q, elevator_t *e) { struct cfq_data *cfqd; @@ -2097,6 +2181,7 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_LIST_HEAD(&cfqd->cur_rr); INIT_LIST_HEAD(&cfqd->idle_rr); INIT_LIST_HEAD(&cfqd->empty_list); + INIT_LIST_HEAD(&cfqd->cic_list); cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); if (!cfqd->crq_hash) @@ -2118,7 +2203,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) e->elevator_data = cfqd; cfqd->queue = q; - atomic_inc(&q->refcnt); cfqd->max_queued = q->nr_requests / 4; q->nr_batching = cfq_queued; @@ -2133,8 +2217,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); - atomic_set(&cfqd->ref, 1); - cfqd->cfq_queued = cfq_queued; cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; @@ -2193,11 +2275,6 @@ fail: /* * sysfs parts below --> */ -struct cfq_fs_entry { - struct attribute attr; - ssize_t (*show)(struct cfq_data *, char *); - ssize_t (*store)(struct cfq_data *, const char *, size_t); -}; static ssize_t cfq_var_show(unsigned int var, char *page) @@ -2215,8 +2292,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count) } #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \ +static ssize_t __FUNC(elevator_t *e, char *page) \ { \ + struct cfq_data *cfqd = e->elevator_data; \ unsigned int __data = __VAR; \ if (__CONV) \ __data = jiffies_to_msecs(__data); \ @@ -2226,8 +2304,8 @@ SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); -SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0); -SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0); +SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); +SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); @@ -2236,8 +2314,9 @@ SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \ +static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ { \ + struct cfq_data *cfqd = e->elevator_data; \ unsigned int __data; \ int ret = cfq_var_store(&__data, (page), count); \ if (__data < (MIN)) \ @@ -2254,8 +2333,8 @@ STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); -STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); -STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); +STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); @@ -2263,112 +2342,22 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0); #undef STORE_FUNCTION -static struct cfq_fs_entry cfq_quantum_entry = { - .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_quantum_show, - .store = cfq_quantum_store, -}; -static struct cfq_fs_entry cfq_queued_entry = { - .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_queued_show, - .store = cfq_queued_store, -}; -static struct cfq_fs_entry cfq_fifo_expire_sync_entry = { - .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_fifo_expire_sync_show, - .store = cfq_fifo_expire_sync_store, -}; -static struct cfq_fs_entry cfq_fifo_expire_async_entry = { - .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_fifo_expire_async_show, - .store = cfq_fifo_expire_async_store, -}; -static struct cfq_fs_entry cfq_back_max_entry = { - .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_back_max_show, - .store = cfq_back_max_store, -}; -static struct cfq_fs_entry cfq_back_penalty_entry = { - .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_back_penalty_show, - .store = cfq_back_penalty_store, -}; -static struct cfq_fs_entry cfq_slice_sync_entry = { - .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_slice_sync_show, - .store = cfq_slice_sync_store, -}; -static struct cfq_fs_entry cfq_slice_async_entry = { - .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_slice_async_show, - .store = cfq_slice_async_store, -}; -static struct cfq_fs_entry cfq_slice_async_rq_entry = { - .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_slice_async_rq_show, - .store = cfq_slice_async_rq_store, -}; -static struct cfq_fs_entry cfq_slice_idle_entry = { - .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_slice_idle_show, - .store = cfq_slice_idle_store, -}; -static struct cfq_fs_entry cfq_max_depth_entry = { - .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_max_depth_show, - .store = cfq_max_depth_store, -}; - -static struct attribute *default_attrs[] = { - &cfq_quantum_entry.attr, - &cfq_queued_entry.attr, - &cfq_fifo_expire_sync_entry.attr, - &cfq_fifo_expire_async_entry.attr, - &cfq_back_max_entry.attr, - &cfq_back_penalty_entry.attr, - &cfq_slice_sync_entry.attr, - &cfq_slice_async_entry.attr, - &cfq_slice_async_rq_entry.attr, - &cfq_slice_idle_entry.attr, - &cfq_max_depth_entry.attr, - NULL, -}; - -#define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr) - -static ssize_t -cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - elevator_t *e = container_of(kobj, elevator_t, kobj); - struct cfq_fs_entry *entry = to_cfq(attr); - - if (!entry->show) - return -EIO; - - return entry->show(e->elevator_data, page); -} - -static ssize_t -cfq_attr_store(struct kobject *kobj, struct attribute *attr, - const char *page, size_t length) -{ - elevator_t *e = container_of(kobj, elevator_t, kobj); - struct cfq_fs_entry *entry = to_cfq(attr); - - if (!entry->store) - return -EIO; - - return entry->store(e->elevator_data, page, length); -} - -static struct sysfs_ops cfq_sysfs_ops = { - .show = cfq_attr_show, - .store = cfq_attr_store, -}; - -static struct kobj_type cfq_ktype = { - .sysfs_ops = &cfq_sysfs_ops, - .default_attrs = default_attrs, +#define CFQ_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) + +static struct elv_fs_entry cfq_attrs[] = { + CFQ_ATTR(quantum), + CFQ_ATTR(queued), + CFQ_ATTR(fifo_expire_sync), + CFQ_ATTR(fifo_expire_async), + CFQ_ATTR(back_seek_max), + CFQ_ATTR(back_seek_penalty), + CFQ_ATTR(slice_sync), + CFQ_ATTR(slice_async), + CFQ_ATTR(slice_async_rq), + CFQ_ATTR(slice_idle), + CFQ_ATTR(max_depth), + __ATTR_NULL }; static struct elevator_type iosched_cfq = { @@ -2389,8 +2378,9 @@ static struct elevator_type iosched_cfq = { .elevator_may_queue_fn = cfq_may_queue, .elevator_init_fn = cfq_init_queue, .elevator_exit_fn = cfq_exit_queue, + .trim = cfq_trim, }, - .elevator_ktype = &cfq_ktype, + .elevator_attrs = cfq_attrs, .elevator_name = "cfq", .elevator_owner = THIS_MODULE, }; @@ -2419,7 +2409,13 @@ static int __init cfq_init(void) static void __exit cfq_exit(void) { + DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_cfq); + ioc_gone = &all_gone; + barrier(); + if (atomic_read(&ioc_count)) + complete(ioc_gone); + synchronize_rcu(); cfq_slab_kill(); } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 27e494b..399fa1e 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -694,11 +694,6 @@ deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, /* * sysfs parts below */ -struct deadline_fs_entry { - struct attribute attr; - ssize_t (*show)(struct deadline_data *, char *); - ssize_t (*store)(struct deadline_data *, const char *, size_t); -}; static ssize_t deadline_var_show(int var, char *page) @@ -716,23 +711,25 @@ deadline_var_store(int *var, const char *page, size_t count) } #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -static ssize_t __FUNC(struct deadline_data *dd, char *page) \ +static ssize_t __FUNC(elevator_t *e, char *page) \ { \ - int __data = __VAR; \ + struct deadline_data *dd = e->elevator_data; \ + int __data = __VAR; \ if (__CONV) \ __data = jiffies_to_msecs(__data); \ return deadline_var_show(__data, (page)); \ } -SHOW_FUNCTION(deadline_readexpire_show, dd->fifo_expire[READ], 1); -SHOW_FUNCTION(deadline_writeexpire_show, dd->fifo_expire[WRITE], 1); -SHOW_FUNCTION(deadline_writesstarved_show, dd->writes_starved, 0); -SHOW_FUNCTION(deadline_frontmerges_show, dd->front_merges, 0); -SHOW_FUNCTION(deadline_fifobatch_show, dd->fifo_batch, 0); +SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); +SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); +SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); +SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); +SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t __FUNC(struct deadline_data *dd, const char *page, size_t count) \ +static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ { \ + struct deadline_data *dd = e->elevator_data; \ int __data; \ int ret = deadline_var_store(&__data, (page), count); \ if (__data < (MIN)) \ @@ -745,83 +742,24 @@ static ssize_t __FUNC(struct deadline_data *dd, const char *page, size_t count) *(__PTR) = __data; \ return ret; \ } -STORE_FUNCTION(deadline_readexpire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); -STORE_FUNCTION(deadline_writeexpire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); -STORE_FUNCTION(deadline_writesstarved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); -STORE_FUNCTION(deadline_frontmerges_store, &dd->front_merges, 0, 1, 0); -STORE_FUNCTION(deadline_fifobatch_store, &dd->fifo_batch, 0, INT_MAX, 0); +STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); +STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); +STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); #undef STORE_FUNCTION -static struct deadline_fs_entry deadline_readexpire_entry = { - .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR }, - .show = deadline_readexpire_show, - .store = deadline_readexpire_store, -}; -static struct deadline_fs_entry deadline_writeexpire_entry = { - .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR }, - .show = deadline_writeexpire_show, - .store = deadline_writeexpire_store, -}; -static struct deadline_fs_entry deadline_writesstarved_entry = { - .attr = {.name = "writes_starved", .mode = S_IRUGO | S_IWUSR }, - .show = deadline_writesstarved_show, - .store = deadline_writesstarved_store, -}; -static struct deadline_fs_entry deadline_frontmerges_entry = { - .attr = {.name = "front_merges", .mode = S_IRUGO | S_IWUSR }, - .show = deadline_frontmerges_show, - .store = deadline_frontmerges_store, -}; -static struct deadline_fs_entry deadline_fifobatch_entry = { - .attr = {.name = "fifo_batch", .mode = S_IRUGO | S_IWUSR }, - .show = deadline_fifobatch_show, - .store = deadline_fifobatch_store, -}; - -static struct attribute *default_attrs[] = { - &deadline_readexpire_entry.attr, - &deadline_writeexpire_entry.attr, - &deadline_writesstarved_entry.attr, - &deadline_frontmerges_entry.attr, - &deadline_fifobatch_entry.attr, - NULL, -}; - -#define to_deadline(atr) container_of((atr), struct deadline_fs_entry, attr) - -static ssize_t -deadline_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - elevator_t *e = container_of(kobj, elevator_t, kobj); - struct deadline_fs_entry *entry = to_deadline(attr); - - if (!entry->show) - return -EIO; - - return entry->show(e->elevator_data, page); -} - -static ssize_t -deadline_attr_store(struct kobject *kobj, struct attribute *attr, - const char *page, size_t length) -{ - elevator_t *e = container_of(kobj, elevator_t, kobj); - struct deadline_fs_entry *entry = to_deadline(attr); - - if (!entry->store) - return -EIO; - - return entry->store(e->elevator_data, page, length); -} - -static struct sysfs_ops deadline_sysfs_ops = { - .show = deadline_attr_show, - .store = deadline_attr_store, -}; - -static struct kobj_type deadline_ktype = { - .sysfs_ops = &deadline_sysfs_ops, - .default_attrs = default_attrs, +#define DD_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ + deadline_##name##_store) + +static struct elv_fs_entry deadline_attrs[] = { + DD_ATTR(read_expire), + DD_ATTR(write_expire), + DD_ATTR(writes_starved), + DD_ATTR(front_merges), + DD_ATTR(fifo_batch), + __ATTR_NULL }; static struct elevator_type iosched_deadline = { @@ -840,7 +778,7 @@ static struct elevator_type iosched_deadline = { .elevator_exit_fn = deadline_exit_queue, }, - .elevator_ktype = &deadline_ktype, + .elevator_attrs = deadline_attrs, .elevator_name = "deadline", .elevator_owner = THIS_MODULE, }; diff --git a/block/elevator.c b/block/elevator.c index 24b702d..db3d0d8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -120,15 +120,10 @@ static struct elevator_type *elevator_get(const char *name) return e; } -static int elevator_attach(request_queue_t *q, struct elevator_type *e, - struct elevator_queue *eq) +static int elevator_attach(request_queue_t *q, struct elevator_queue *eq) { int ret = 0; - memset(eq, 0, sizeof(*eq)); - eq->ops = &e->ops; - eq->elevator_type = e; - q->elevator = eq; if (eq->ops->elevator_init_fn) @@ -154,6 +149,32 @@ static int __init elevator_setup(char *str) __setup("elevator=", elevator_setup); +static struct kobj_type elv_ktype; + +static elevator_t *elevator_alloc(struct elevator_type *e) +{ + elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); + if (eq) { + memset(eq, 0, sizeof(*eq)); + eq->ops = &e->ops; + eq->elevator_type = e; + kobject_init(&eq->kobj); + snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); + eq->kobj.ktype = &elv_ktype; + mutex_init(&eq->sysfs_lock); + } else { + elevator_put(e); + } + return eq; +} + +static void elevator_release(struct kobject *kobj) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + elevator_put(e->elevator_type); + kfree(e); +} + int elevator_init(request_queue_t *q, char *name) { struct elevator_type *e = NULL; @@ -176,29 +197,26 @@ int elevator_init(request_queue_t *q, char *name) e = elevator_get("noop"); } - eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); - if (!eq) { - elevator_put(e); + eq = elevator_alloc(e); + if (!eq) return -ENOMEM; - } - ret = elevator_attach(q, e, eq); - if (ret) { - kfree(eq); - elevator_put(e); - } + ret = elevator_attach(q, eq); + if (ret) + kobject_put(&eq->kobj); return ret; } void elevator_exit(elevator_t *e) { + mutex_lock(&e->sysfs_lock); if (e->ops->elevator_exit_fn) e->ops->elevator_exit_fn(e); + e->ops = NULL; + mutex_unlock(&e->sysfs_lock); - elevator_put(e->elevator_type); - e->elevator_type = NULL; - kfree(e); + kobject_put(&e->kobj); } /* @@ -627,26 +645,79 @@ void elv_completed_request(request_queue_t *q, struct request *rq) } } -int elv_register_queue(struct request_queue *q) +#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) + +static ssize_t +elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { - elevator_t *e = q->elevator; + elevator_t *e = container_of(kobj, elevator_t, kobj); + struct elv_fs_entry *entry = to_elv(attr); + ssize_t error; - e->kobj.parent = kobject_get(&q->kobj); - if (!e->kobj.parent) - return -EBUSY; + if (!entry->show) + return -EIO; - snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - e->kobj.ktype = e->elevator_type->elevator_ktype; + mutex_lock(&e->sysfs_lock); + error = e->ops ? entry->show(e, page) : -ENOENT; + mutex_unlock(&e->sysfs_lock); + return error; +} + +static ssize_t +elv_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + struct elv_fs_entry *entry = to_elv(attr); + ssize_t error; + + if (!entry->store) + return -EIO; + + mutex_lock(&e->sysfs_lock); + error = e->ops ? entry->store(e, page, length) : -ENOENT; + mutex_unlock(&e->sysfs_lock); + return error; +} + +static struct sysfs_ops elv_sysfs_ops = { + .show = elv_attr_show, + .store = elv_attr_store, +}; + +static struct kobj_type elv_ktype = { + .sysfs_ops = &elv_sysfs_ops, + .release = elevator_release, +}; - return kobject_register(&e->kobj); +int elv_register_queue(struct request_queue *q) +{ + elevator_t *e = q->elevator; + int error; + + e->kobj.parent = &q->kobj; + + error = kobject_add(&e->kobj); + if (!error) { + struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; + if (attr) { + while (attr->attr.name) { + if (sysfs_create_file(&e->kobj, &attr->attr)) + break; + attr++; + } + } + kobject_uevent(&e->kobj, KOBJ_ADD); + } + return error; } void elv_unregister_queue(struct request_queue *q) { if (q) { elevator_t *e = q->elevator; - kobject_unregister(&e->kobj); - kobject_put(&q->kobj); + kobject_uevent(&e->kobj, KOBJ_REMOVE); + kobject_del(&e->kobj); } } @@ -675,21 +746,15 @@ void elv_unregister(struct elevator_type *e) /* * Iterate every thread in the process to remove the io contexts. */ - read_lock(&tasklist_lock); - do_each_thread(g, p) { - struct io_context *ioc = p->io_context; - if (ioc && ioc->cic) { - ioc->cic->exit(ioc->cic); - ioc->cic->dtor(ioc->cic); - ioc->cic = NULL; - } - if (ioc && ioc->aic) { - ioc->aic->exit(ioc->aic); - ioc->aic->dtor(ioc->aic); - ioc->aic = NULL; - } - } while_each_thread(g, p); - read_unlock(&tasklist_lock); + if (e->ops.trim) { + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + e->ops.trim(p->io_context); + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + } spin_lock_irq(&elv_list_lock); list_del_init(&e->list); @@ -703,16 +768,16 @@ EXPORT_SYMBOL_GPL(elv_unregister); * need for the new one. this way we have a chance of going back to the old * one, if the new one fails init for some reason. */ -static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) +static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) { elevator_t *old_elevator, *e; /* * Allocate new elevator */ - e = kmalloc(sizeof(elevator_t), GFP_KERNEL); + e = elevator_alloc(new_e); if (!e) - goto error; + return 0; /* * Turn on BYPASS and drain all requests w/ elevator private data @@ -743,7 +808,7 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) /* * attach and start new elevator */ - if (elevator_attach(q, new_e, e)) + if (elevator_attach(q, e)) goto fail; if (elv_register_queue(q)) @@ -754,7 +819,7 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) */ elevator_exit(old_elevator); clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); - return; + return 1; fail_register: /* @@ -767,10 +832,9 @@ fail: q->elevator = old_elevator; elv_register_queue(q); clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); - kfree(e); -error: - elevator_put(new_e); - printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); + if (e) + kobject_put(&e->kobj); + return 0; } ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) @@ -797,7 +861,8 @@ ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) return count; } - elevator_switch(q, e); + if (!elevator_switch(q, e)) + printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); return count; } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 0ef2971..6c793b1 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1740,16 +1740,11 @@ EXPORT_SYMBOL(blk_run_queue); * Hopefully the low level driver will have finished any * outstanding requests first... **/ -void blk_cleanup_queue(request_queue_t * q) +static void blk_release_queue(struct kobject *kobj) { + request_queue_t *q = container_of(kobj, struct request_queue, kobj); struct request_list *rl = &q->rq; - if (!atomic_dec_and_test(&q->refcnt)) - return; - - if (q->elevator) - elevator_exit(q->elevator); - blk_sync_queue(q); if (rl->rq_pool) @@ -1761,6 +1756,24 @@ void blk_cleanup_queue(request_queue_t * q) kmem_cache_free(requestq_cachep, q); } +void blk_put_queue(request_queue_t *q) +{ + kobject_put(&q->kobj); +} +EXPORT_SYMBOL(blk_put_queue); + +void blk_cleanup_queue(request_queue_t * q) +{ + mutex_lock(&q->sysfs_lock); + set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + mutex_unlock(&q->sysfs_lock); + + if (q->elevator) + elevator_exit(q->elevator); + + blk_put_queue(q); +} + EXPORT_SYMBOL(blk_cleanup_queue); static int blk_init_free_list(request_queue_t *q) @@ -1788,6 +1801,8 @@ request_queue_t *blk_alloc_queue(gfp_t gfp_mask) } EXPORT_SYMBOL(blk_alloc_queue); +static struct kobj_type queue_ktype; + request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { request_queue_t *q; @@ -1798,11 +1813,16 @@ request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) memset(q, 0, sizeof(*q)); init_timer(&q->unplug_timer); - atomic_set(&q->refcnt, 1); + + snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); + q->kobj.ktype = &queue_ktype; + kobject_init(&q->kobj); q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; q->backing_dev_info.unplug_io_data = q; + mutex_init(&q->sysfs_lock); + return q; } EXPORT_SYMBOL(blk_alloc_queue_node); @@ -1854,8 +1874,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return NULL; q->node = node_id; - if (blk_init_free_list(q)) - goto out_init; + if (blk_init_free_list(q)) { + kmem_cache_free(requestq_cachep, q); + return NULL; + } /* * if caller didn't supply a lock, they get per-queue locking with @@ -1891,9 +1913,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return q; } - blk_cleanup_queue(q); -out_init: - kmem_cache_free(requestq_cachep, q); + blk_put_queue(q); return NULL; } EXPORT_SYMBOL(blk_init_queue_node); @@ -1901,7 +1921,7 @@ EXPORT_SYMBOL(blk_init_queue_node); int blk_get_queue(request_queue_t *q) { if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { - atomic_inc(&q->refcnt); + kobject_get(&q->kobj); return 0; } @@ -3477,10 +3497,12 @@ void put_io_context(struct io_context *ioc) BUG_ON(atomic_read(&ioc->refcount) == 0); if (atomic_dec_and_test(&ioc->refcount)) { + rcu_read_lock(); if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); if (ioc->cic && ioc->cic->dtor) ioc->cic->dtor(ioc->cic); + rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); } @@ -3614,10 +3636,13 @@ static ssize_t queue_requests_store(struct request_queue *q, const char *page, size_t count) { struct request_list *rl = &q->rq; + unsigned long nr; + int ret = queue_var_store(&nr, page, count); + if (nr < BLKDEV_MIN_RQ) + nr = BLKDEV_MIN_RQ; - int ret = queue_var_store(&q->nr_requests, page, count); - if (q->nr_requests < BLKDEV_MIN_RQ) - q->nr_requests = BLKDEV_MIN_RQ; + spin_lock_irq(q->queue_lock); + q->nr_requests = nr; blk_queue_congestion_threshold(q); if (rl->count[READ] >= queue_congestion_on_threshold(q)) @@ -3643,6 +3668,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) blk_clear_queue_full(q, WRITE); wake_up(&rl->wait[WRITE]); } + spin_unlock_irq(q->queue_lock); return ret; } @@ -3758,13 +3784,19 @@ static ssize_t queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct queue_sysfs_entry *entry = to_queue(attr); - struct request_queue *q; + request_queue_t *q = container_of(kobj, struct request_queue, kobj); + ssize_t res; - q = container_of(kobj, struct request_queue, kobj); if (!entry->show) return -EIO; - - return entry->show(q, page); + mutex_lock(&q->sysfs_lock); + if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { + mutex_unlock(&q->sysfs_lock); + return -ENOENT; + } + res = entry->show(q, page); + mutex_unlock(&q->sysfs_lock); + return res; } static ssize_t @@ -3772,13 +3804,20 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { struct queue_sysfs_entry *entry = to_queue(attr); - struct request_queue *q; + request_queue_t *q = container_of(kobj, struct request_queue, kobj); + + ssize_t res; - q = container_of(kobj, struct request_queue, kobj); if (!entry->store) return -EIO; - - return entry->store(q, page, length); + mutex_lock(&q->sysfs_lock); + if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { + mutex_unlock(&q->sysfs_lock); + return -ENOENT; + } + res = entry->store(q, page, length); + mutex_unlock(&q->sysfs_lock); + return res; } static struct sysfs_ops queue_sysfs_ops = { @@ -3789,6 +3828,7 @@ static struct sysfs_ops queue_sysfs_ops = { static struct kobj_type queue_ktype = { .sysfs_ops = &queue_sysfs_ops, .default_attrs = default_attrs, + .release = blk_release_queue, }; int blk_register_queue(struct gendisk *disk) @@ -3801,19 +3841,17 @@ int blk_register_queue(struct gendisk *disk) return -ENXIO; q->kobj.parent = kobject_get(&disk->kobj); - if (!q->kobj.parent) - return -EBUSY; - snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); - q->kobj.ktype = &queue_ktype; - - ret = kobject_register(&q->kobj); + ret = kobject_add(&q->kobj); if (ret < 0) return ret; + kobject_uevent(&q->kobj, KOBJ_ADD); + ret = elv_register_queue(q); if (ret) { - kobject_unregister(&q->kobj); + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); return ret; } @@ -3827,7 +3865,8 @@ void blk_unregister_queue(struct gendisk *disk) if (q && q->request_fn) { elv_unregister_queue(q); - kobject_unregister(&q->kobj); + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); kobject_put(&disk->kobj); } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5f6d1a5..0010704 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1307,7 +1307,7 @@ static int __init loop_init(void) out_mem4: while (i--) - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); devfs_remove("loop"); i = max_loop; out_mem3: @@ -1328,7 +1328,7 @@ static void loop_exit(void) for (i = 0; i < max_loop; i++) { del_gendisk(disks[i]); - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); put_disk(disks[i]); } devfs_remove("loop"); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index bc9b2bc..476a5b5 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2514,7 +2514,7 @@ static int pkt_setup_dev(struct pkt_ctrl_command *ctrl_cmd) return 0; out_new_dev: - blk_put_queue(disk->queue); + blk_cleanup_queue(disk->queue); out_mem2: put_disk(disk); out_mem: @@ -2555,7 +2555,7 @@ static int pkt_remove_dev(struct pkt_ctrl_command *ctrl_cmd) DPRINTK("pktcdvd: writer %s unmapped\n", pd->name); del_gendisk(pd->disk); - blk_put_queue(pd->disk->queue); + blk_cleanup_queue(pd->disk->queue); put_disk(pd->disk); pkt_devs[idx] = NULL; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 4ada126..c16e66b 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -1131,7 +1131,7 @@ static void mm_pci_remove(struct pci_dev *dev) pci_free_consistent(card->dev, PAGE_SIZE*2, card->mm_pages[1].desc, card->mm_pages[1].page_dma); - blk_put_queue(card->queue); + blk_cleanup_queue(card->queue); } static const struct pci_device_id mm_pci_ids[] = { { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 745ca1f..88d6020 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -840,7 +840,7 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) bad3: mempool_destroy(md->io_pool); bad2: - blk_put_queue(md->queue); + blk_cleanup_queue(md->queue); free_minor(minor); bad1: kfree(md); @@ -860,7 +860,7 @@ static void free_dev(struct mapped_device *md) del_gendisk(md->disk); free_minor(minor); put_disk(md->disk); - blk_put_queue(md->queue); + blk_cleanup_queue(md->queue); kfree(md); } diff --git a/drivers/md/md.c b/drivers/md/md.c index d05e312..5ed2228 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -213,8 +213,11 @@ static void mddev_put(mddev_t *mddev) return; if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); - blk_put_queue(mddev->queue); + /* that blocks */ + blk_cleanup_queue(mddev->queue); + /* that also blocks */ kobject_unregister(&mddev->kobj); + /* result blows... */ } spin_unlock(&all_mddevs_lock); } diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 830528d..dc845f3 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -100,6 +100,10 @@ static int max_interrupt_work = 10; static char versionA[] __initdata = DRV_NAME ".c:" DRV_VERSION " " DRV_RELDATE " becker@scyld.com\n"; static char versionB[] __initdata = "http://www.scyld.com/network/3c509.html\n"; +#if defined(CONFIG_PM) && (defined(CONFIG_MCA) || defined(CONFIG_EISA)) +#define EL3_SUSPEND +#endif + #ifdef EL3_DEBUG static int el3_debug = EL3_DEBUG; #else @@ -174,9 +178,6 @@ struct el3_private { /* skb send-queue */ int head, size; struct sk_buff *queue[SKB_QUEUE_SIZE]; -#ifdef CONFIG_PM_LEGACY - struct pm_dev *pmdev; -#endif enum { EL3_MCA, EL3_PNP, @@ -201,11 +202,15 @@ static void el3_tx_timeout (struct net_device *dev); static void el3_down(struct net_device *dev); static void el3_up(struct net_device *dev); static struct ethtool_ops ethtool_ops; -#ifdef CONFIG_PM_LEGACY -static int el3_suspend(struct pm_dev *pdev); -static int el3_resume(struct pm_dev *pdev); -static int el3_pm_callback(struct pm_dev *pdev, pm_request_t rqst, void *data); +#ifdef EL3_SUSPEND +static int el3_suspend(struct device *, pm_message_t); +static int el3_resume(struct device *); +#else +#define el3_suspend NULL +#define el3_resume NULL #endif + + /* generic device remove for all device types */ #if defined(CONFIG_EISA) || defined(CONFIG_MCA) static int el3_device_remove (struct device *device); @@ -229,7 +234,9 @@ static struct eisa_driver el3_eisa_driver = { .driver = { .name = "3c509", .probe = el3_eisa_probe, - .remove = __devexit_p (el3_device_remove) + .remove = __devexit_p (el3_device_remove), + .suspend = el3_suspend, + .resume = el3_resume, } }; #endif @@ -262,6 +269,8 @@ static struct mca_driver el3_mca_driver = { .bus = &mca_bus_type, .probe = el3_mca_probe, .remove = __devexit_p(el3_device_remove), + .suspend = el3_suspend, + .resume = el3_resume, }, }; #endif /* CONFIG_MCA */ @@ -362,10 +371,6 @@ static void el3_common_remove (struct net_device *dev) struct el3_private *lp = netdev_priv(dev); (void) lp; /* Keep gcc quiet... */ -#ifdef CONFIG_PM_LEGACY - if (lp->pmdev) - pm_unregister(lp->pmdev); -#endif #if defined(__ISAPNP__) if (lp->type == EL3_PNP) pnp_device_detach(to_pnp_dev(lp->dev)); @@ -572,16 +577,6 @@ no_pnp: if (err) goto out1; -#ifdef CONFIG_PM_LEGACY - /* register power management */ - lp->pmdev = pm_register(PM_ISA_DEV, card_idx, el3_pm_callback); - if (lp->pmdev) { - struct pm_dev *p; - p = lp->pmdev; - p->data = (struct net_device *)dev; - } -#endif - el3_cards++; lp->next_dev = el3_root_dev; el3_root_dev = dev; @@ -1480,20 +1475,17 @@ el3_up(struct net_device *dev) } /* Power Management support functions */ -#ifdef CONFIG_PM_LEGACY +#ifdef EL3_SUSPEND static int -el3_suspend(struct pm_dev *pdev) +el3_suspend(struct device *pdev, pm_message_t state) { unsigned long flags; struct net_device *dev; struct el3_private *lp; int ioaddr; - if (!pdev && !pdev->data) - return -EINVAL; - - dev = (struct net_device *)pdev->data; + dev = pdev->driver_data; lp = netdev_priv(dev); ioaddr = dev->base_addr; @@ -1510,17 +1502,14 @@ el3_suspend(struct pm_dev *pdev) } static int -el3_resume(struct pm_dev *pdev) +el3_resume(struct device *pdev) { unsigned long flags; struct net_device *dev; struct el3_private *lp; int ioaddr; - if (!pdev && !pdev->data) - return -EINVAL; - - dev = (struct net_device *)pdev->data; + dev = pdev->driver_data; lp = netdev_priv(dev); ioaddr = dev->base_addr; @@ -1536,20 +1525,7 @@ el3_resume(struct pm_dev *pdev) return 0; } -static int -el3_pm_callback(struct pm_dev *pdev, pm_request_t rqst, void *data) -{ - switch (rqst) { - case PM_SUSPEND: - return el3_suspend(pdev); - - case PM_RESUME: - return el3_resume(pdev); - } - return 0; -} - -#endif /* CONFIG_PM_LEGACY */ +#endif /* EL3_SUSPEND */ /* Parameters that may be passed into the module. */ static int debug = -1; diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c index 9e1fe2e..b40885d 100644 --- a/drivers/net/3c523.c +++ b/drivers/net/3c523.c @@ -105,6 +105,7 @@ #include <linux/mca-legacy.h> #include <linux/ethtool.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -658,7 +659,7 @@ static int init586(struct net_device *dev) s = jiffies; /* warning: only active with interrupts on !! */ while (!(cfg_cmd->cmd_status & STAT_COMPL)) { - if (jiffies - s > 30*HZ/100) + if (time_after(jiffies, s + 30*HZ/100)) break; } @@ -684,7 +685,7 @@ static int init586(struct net_device *dev) s = jiffies; while (!(ias_cmd->cmd_status & STAT_COMPL)) { - if (jiffies - s > 30*HZ/100) + if (time_after(jiffies, s + 30*HZ/100)) break; } @@ -709,7 +710,7 @@ static int init586(struct net_device *dev) s = jiffies; while (!(tdr_cmd->cmd_status & STAT_COMPL)) { - if (jiffies - s > 30*HZ/100) { + if (time_after(jiffies, s + 30*HZ/100)) { printk(KERN_WARNING "%s: %d Problems while running the TDR.\n", dev->name, __LINE__); result = 1; break; @@ -798,7 +799,7 @@ static int init586(struct net_device *dev) elmc_id_attn586(); s = jiffies; while (!(mc_cmd->cmd_status & STAT_COMPL)) { - if (jiffies - s > 30*HZ/100) + if (time_after(jiffies, s + 30*HZ/100)) break; } if (!(mc_cmd->cmd_status & STAT_COMPL)) { diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 7f47124..5d11a06 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -258,6 +258,7 @@ static int vortex_debug = 1; #include <linux/highmem.h> #include <linux/eisa.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/irq.h> /* For NR_IRQS only. */ #include <asm/io.h> #include <asm/uaccess.h> @@ -841,7 +842,7 @@ enum xcvr_types { XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10, }; -static struct media_table { +static const struct media_table { char *name; unsigned int media_bits:16, /* Bits to set in Wn4_Media register. */ mask:8, /* The transceiver-present bit in Wn3_Config.*/ @@ -1445,7 +1446,7 @@ static int __devinit vortex_probe1(struct device *gendev, } { - static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; + static const char * const ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; unsigned int config; EL3WINDOW(3); vp->available_media = ioread16(ioaddr + Wn3_Options); @@ -2724,7 +2725,7 @@ boomerang_rx(struct net_device *dev) skb = dev_alloc_skb(PKT_BUF_SZ); if (skb == NULL) { static unsigned long last_jif; - if ((jiffies - last_jif) > 10 * HZ) { + if (time_after(jiffies, last_jif + 10 * HZ)) { printk(KERN_WARNING "%s: memory shortage\n", dev->name); last_jif = jiffies; } diff --git a/drivers/net/7990.c b/drivers/net/7990.c index 18b027e..86633c5 100644 --- a/drivers/net/7990.c +++ b/drivers/net/7990.c @@ -29,7 +29,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/skbuff.h> -#include <linux/irq.h> +#include <asm/irq.h> /* Used for the temporal inet entries and routing */ #include <linux/socket.h> #include <linux/bitops.h> diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index dd41049..ce99845 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -1276,7 +1276,7 @@ static int cp_change_mtu(struct net_device *dev, int new_mtu) } #endif /* BROKEN */ -static char mii_2_8139_map[8] = { +static const char mii_2_8139_map[8] = { BasicModeCtrl, BasicModeStatus, 0, diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index 2beac55..e58d4c50 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -229,7 +229,7 @@ typedef enum { /* indexed by board_t, above */ -static struct { +static const struct { const char *name; u32 hw_flags; } board_info[] __devinitdata = { @@ -1192,7 +1192,7 @@ static int __devinit read_eeprom (void __iomem *ioaddr, int location, int addr_l #define mdio_delay() RTL_R8(Config4) -static char mii_2_8139_map[8] = { +static const char mii_2_8139_map[8] = { BasicModeCtrl, BasicModeStatus, 0, diff --git a/drivers/net/82596.c b/drivers/net/82596.c index 13b745b..da0c878 100644 --- a/drivers/net/82596.c +++ b/drivers/net/82596.c @@ -614,7 +614,7 @@ static void rebuild_rx_bufs(struct net_device *dev) static int init_i596_mem(struct net_device *dev) { struct i596_private *lp = dev->priv; -#if !defined(ENABLE_MVME16x_NET) && !defined(ENABLE_BVME6000_NET) +#if !defined(ENABLE_MVME16x_NET) && !defined(ENABLE_BVME6000_NET) || defined(ENABLE_APRICOT) short ioaddr = dev->base_addr; #endif unsigned long flags; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index aa633fa..e0b1109 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -66,7 +66,7 @@ config BONDING 'Trunking' by Sun, 802.3ad by the IEEE, and 'Bonding' in Linux. The driver supports multiple bonding modes to allow for both high - perfomance and high availability operation. + performance and high availability operation. Refer to <file:Documentation/networking/bonding.txt> for more information. @@ -698,8 +698,8 @@ config VORTEX depends on NET_VENDOR_3COM && (PCI || EISA) select MII ---help--- - This option enables driver support for a large number of 10mbps and - 10/100mbps EISA, PCI and PCMCIA 3Com network cards: + This option enables driver support for a large number of 10Mbps and + 10/100Mbps EISA, PCI and PCMCIA 3Com network cards: "Vortex" (Fast EtherLink 3c590/3c592/3c595/3c597) EISA and PCI "Boomerang" (EtherLink XL 3c900 or 3c905) PCI @@ -1021,7 +1021,7 @@ config EEXPRESS_PRO depends on NET_ISA ---help--- If you have a network (Ethernet) card of this type, say Y. This - driver supports intel i82595{FX,TX} based boards. Note however + driver supports Intel i82595{FX,TX} based boards. Note however that the EtherExpress PRO/100 Ethernet card has its own separate driver. Please read the Ethernet-HOWTO, available from <http://www.tldp.org/docs.html#howto>. @@ -1208,7 +1208,7 @@ config IBM_EMAC_RX_SKB_HEADROOM help Additional receive skb headroom. Note, that driver will always reserve at least 2 bytes to make IP header - aligned, so usualy there is no need to add any additional + aligned, so usually there is no need to add any additional headroom. If unsure, set to 0. @@ -1372,8 +1372,8 @@ config B44 called b44. config FORCEDETH - tristate "Reverse Engineered nForce Ethernet support (EXPERIMENTAL)" - depends on NET_PCI && PCI && EXPERIMENTAL + tristate "nForce Ethernet support" + depends on NET_PCI && PCI help If you have a network (Ethernet) controller of this type, say Y and read the Ethernet-HOWTO, available from @@ -1614,11 +1614,7 @@ config SIS900 ---help--- This is a driver for the Fast Ethernet PCI network cards based on the SiS 900 and SiS 7016 chips. The SiS 900 core is also embedded in - SiS 630 and SiS 540 chipsets. If you have one of those, say Y and - read the Ethernet-HOWTO, available at - <http://www.tldp.org/docs.html#howto>. Please read - <file:Documentation/networking/sis900.txt> and comments at the - beginning of <file:drivers/net/sis900.c> for more information. + SiS 630 and SiS 540 chipsets. This driver also supports AMD 79C901 HomePNA so that you can use your phone line as a network cable. @@ -1934,7 +1930,7 @@ config MYRI_SBUS will be called myri_sbus. This is recommended. config NS83820 - tristate "National Semiconduct DP83820 support" + tristate "National Semiconductor DP83820 support" depends on PCI help This is a driver for the National Semiconductor DP83820 series @@ -2195,6 +2191,7 @@ config GFAR_NAPI config MV643XX_ETH tristate "MV-643XX Ethernet support" depends on MOMENCO_OCELOT_C || MOMENCO_JAGUAR_ATX || MV64360 || MOMENCO_OCELOT_3 || PPC_MULTIPLATFORM + select MII help This driver supports the gigabit Ethernet on the Marvell MV643XX chipset which is used in the Momenco Ocelot C and Jaguar ATX and @@ -2514,7 +2511,7 @@ config PPP_FILTER Say Y here if you want to be able to filter the packets passing over PPP interfaces. This allows you to control which packets count as activity (i.e. which packets will reset the idle timer or bring up - a demand-dialled link) and which packets are to be dropped entirely. + a demand-dialed link) and which packets are to be dropped entirely. You need to say Y here if you wish to use the pass-filter and active-filter options to pppd. @@ -2702,8 +2699,8 @@ config SHAPER <file:Documentation/networking/shaper.txt> for more information. An alternative to this traffic shaper is the experimental - Class-Based Queueing (CBQ) scheduling support which you get if you - say Y to "QoS and/or fair queueing" above. + Class-Based Queuing (CBQ) scheduling support which you get if you + say Y to "QoS and/or fair queuing" above. To compile this driver as a module, choose M here: the module will be called shaper. If unsure, say N. diff --git a/drivers/net/apne.c b/drivers/net/apne.c index a94216b..b9820b8 100644 --- a/drivers/net/apne.c +++ b/drivers/net/apne.c @@ -36,6 +36,7 @@ #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/io.h> @@ -216,7 +217,7 @@ static int __init apne_probe1(struct net_device *dev, int ioaddr) outb(inb(ioaddr + NE_RESET), ioaddr + NE_RESET); while ((inb(ioaddr + NE_EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk(" not found (no reset ack).\n"); return -ENODEV; } @@ -382,7 +383,7 @@ apne_reset_8390(struct net_device *dev) /* This check _should_not_ be necessary, omit eventually. */ while ((inb(NE_BASE+NE_EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk("%s: ne_reset_8390() did not complete.\n", dev->name); break; } @@ -530,7 +531,7 @@ apne_block_output(struct net_device *dev, int count, dma_start = jiffies; while ((inb(NE_BASE + NE_EN0_ISR) & ENISR_RDC) == 0) - if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ + if (time_after(jiffies, dma_start + 2*HZ/100)) { /* 20ms */ printk("%s: timeout waiting for Tx RDC.\n", dev->name); apne_reset_8390(dev); NS8390_init(dev,1); diff --git a/drivers/net/arcnet/Kconfig b/drivers/net/arcnet/Kconfig index 948de25..7284cca 100644 --- a/drivers/net/arcnet/Kconfig +++ b/drivers/net/arcnet/Kconfig @@ -68,10 +68,10 @@ config ARCNET_CAP packet is stuffed with an extra 4 byte "cookie" which doesn't actually appear on the network. After transmit the driver will send back a packet with protocol byte 0 containing the status of the - transmition: + transmission: 0=no hardware acknowledge 1=excessive nak - 2=transmition accepted by the reciever hardware + 2=transmission accepted by the receiver hardware Received packets are also stuffed with the extra 4 bytes but it will be random data. diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c index e1ea29b..e7555d4 100644 --- a/drivers/net/arcnet/arc-rawmode.c +++ b/drivers/net/arcnet/arc-rawmode.c @@ -42,7 +42,7 @@ static int build_header(struct sk_buff *skb, struct net_device *dev, static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length, int bufnum); -struct ArcProto rawmode_proto = +static struct ArcProto rawmode_proto = { .suffix = 'r', .mtu = XMTU, diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 38c3f03..8c8d6c4 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -97,25 +97,44 @@ static int __init arcrimi_probe(struct net_device *dev) "must specify the shmem and irq!\n"); return -ENODEV; } + if (dev->dev_addr[0] == 0) { + BUGMSG(D_NORMAL, "You need to specify your card's station " + "ID!\n"); + return -ENODEV; + } /* - * Grab the memory region at mem_start for BUFFER_SIZE bytes. + * Grab the memory region at mem_start for MIRROR_SIZE bytes. * Later in arcrimi_found() the real size will be determined * and this reserve will be released and the correct size * will be taken. */ - if (!request_mem_region(dev->mem_start, BUFFER_SIZE, "arcnet (90xx)")) { + if (!request_mem_region(dev->mem_start, MIRROR_SIZE, "arcnet (90xx)")) { BUGMSG(D_NORMAL, "Card memory already allocated\n"); return -ENODEV; } - if (dev->dev_addr[0] == 0) { - release_mem_region(dev->mem_start, BUFFER_SIZE); - BUGMSG(D_NORMAL, "You need to specify your card's station " - "ID!\n"); - return -ENODEV; - } return arcrimi_found(dev); } +static int check_mirror(unsigned long addr, size_t size) +{ + void __iomem *p; + int res = -1; + + if (!request_mem_region(addr, size, "arcnet (90xx)")) + return -1; + + p = ioremap(addr, size); + if (p) { + if (readb(p) == TESTvalue) + res = 1; + else + res = 0; + iounmap(p); + } + + release_mem_region(addr, size); + return res; +} /* * Set up the struct net_device associated with this card. Called after @@ -125,19 +144,28 @@ static int __init arcrimi_found(struct net_device *dev) { struct arcnet_local *lp; unsigned long first_mirror, last_mirror, shmem; + void __iomem *p; int mirror_size; int err; + p = ioremap(dev->mem_start, MIRROR_SIZE); + if (!p) { + release_mem_region(dev->mem_start, MIRROR_SIZE); + BUGMSG(D_NORMAL, "Can't ioremap\n"); + return -ENODEV; + } + /* reserve the irq */ if (request_irq(dev->irq, &arcnet_interrupt, 0, "arcnet (RIM I)", dev)) { - release_mem_region(dev->mem_start, BUFFER_SIZE); + iounmap(p); + release_mem_region(dev->mem_start, MIRROR_SIZE); BUGMSG(D_NORMAL, "Can't get IRQ %d!\n", dev->irq); return -ENODEV; } shmem = dev->mem_start; - isa_writeb(TESTvalue, shmem); - isa_writeb(dev->dev_addr[0], shmem + 1); /* actually the node ID */ + writeb(TESTvalue, p); + writeb(dev->dev_addr[0], p + 1); /* actually the node ID */ /* find the real shared memory start/end points, including mirrors */ @@ -146,17 +174,18 @@ static int __init arcrimi_found(struct net_device *dev) * 2k (or there are no mirrors at all) but on some, it's 4k. */ mirror_size = MIRROR_SIZE; - if (isa_readb(shmem) == TESTvalue - && isa_readb(shmem - mirror_size) != TESTvalue - && isa_readb(shmem - 2 * mirror_size) == TESTvalue) - mirror_size *= 2; + if (readb(p) == TESTvalue + && check_mirror(shmem - MIRROR_SIZE, MIRROR_SIZE) == 0 + && check_mirror(shmem - 2 * MIRROR_SIZE, MIRROR_SIZE) == 1) + mirror_size = 2 * MIRROR_SIZE; - first_mirror = last_mirror = shmem; - while (isa_readb(first_mirror) == TESTvalue) + first_mirror = shmem - mirror_size; + while (check_mirror(first_mirror, mirror_size) == 1) first_mirror -= mirror_size; first_mirror += mirror_size; - while (isa_readb(last_mirror) == TESTvalue) + last_mirror = shmem + mirror_size; + while (check_mirror(last_mirror, mirror_size) == 1) last_mirror += mirror_size; last_mirror -= mirror_size; @@ -181,7 +210,8 @@ static int __init arcrimi_found(struct net_device *dev) * with the correct size. There is a VERY slim chance this could * fail. */ - release_mem_region(shmem, BUFFER_SIZE); + iounmap(p); + release_mem_region(shmem, MIRROR_SIZE); if (!request_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1, "arcnet (90xx)")) { diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 12ef52c..64e2caf 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -52,6 +52,7 @@ #include <net/arp.h> #include <linux/init.h> #include <linux/arcdevice.h> +#include <linux/jiffies.h> /* "do nothing" functions for protocol drivers */ static void null_rx(struct net_device *dev, int bufnum, @@ -61,6 +62,7 @@ static int null_build_header(struct sk_buff *skb, struct net_device *dev, static int null_prepare_tx(struct net_device *dev, struct archdr *pkt, int length, int bufnum); +static void arcnet_rx(struct net_device *dev, int bufnum); /* * one ArcProto per possible proto ID. None of the elements of @@ -71,7 +73,7 @@ static int null_prepare_tx(struct net_device *dev, struct archdr *pkt, struct ArcProto *arc_proto_map[256], *arc_proto_default, *arc_bcast_proto, *arc_raw_proto; -struct ArcProto arc_proto_null = +static struct ArcProto arc_proto_null = { .suffix = '?', .mtu = XMTU, @@ -90,7 +92,6 @@ EXPORT_SYMBOL(arc_proto_map); EXPORT_SYMBOL(arc_proto_default); EXPORT_SYMBOL(arc_bcast_proto); EXPORT_SYMBOL(arc_raw_proto); -EXPORT_SYMBOL(arc_proto_null); EXPORT_SYMBOL(arcnet_unregister_proto); EXPORT_SYMBOL(arcnet_debug); EXPORT_SYMBOL(alloc_arcdev); @@ -118,7 +119,7 @@ static int __init arcnet_init(void) arcnet_debug = debug; - printk(VERSION); + printk("arcnet loaded.\n"); #ifdef ALPHA_WARNING BUGLVL(D_EXTRA) { @@ -178,8 +179,8 @@ EXPORT_SYMBOL(arcnet_dump_skb); * Dump the contents of an ARCnet buffer */ #if (ARCNET_DEBUG_MAX & (D_RX | D_TX)) -void arcnet_dump_packet(struct net_device *dev, int bufnum, char *desc, - int take_arcnet_lock) +static void arcnet_dump_packet(struct net_device *dev, int bufnum, + char *desc, int take_arcnet_lock) { struct arcnet_local *lp = dev->priv; int i, length; @@ -208,7 +209,10 @@ void arcnet_dump_packet(struct net_device *dev, int bufnum, char *desc, } -EXPORT_SYMBOL(arcnet_dump_packet); +#else + +#define arcnet_dump_packet(dev, bufnum, desc,take_arcnet_lock) do { } while (0) + #endif @@ -733,7 +737,7 @@ static void arcnet_timeout(struct net_device *dev) spin_unlock_irqrestore(&lp->lock, flags); - if (jiffies - lp->last_timeout > 10*HZ) { + if (time_after(jiffies, lp->last_timeout + 10*HZ)) { BUGMSG(D_EXTRA, "tx timed out%s (status=%Xh, intmask=%Xh, dest=%02Xh)\n", msg, status, lp->intmask, lp->lasttrans_dest); lp->last_timeout = jiffies; @@ -996,7 +1000,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id, struct pt_regs *regs) * This is a generic packet receiver that calls arcnet??_rx depending on the * protocol ID found. */ -void arcnet_rx(struct net_device *dev, int bufnum) +static void arcnet_rx(struct net_device *dev, int bufnum) { struct arcnet_local *lp = dev->priv; struct archdr pkt; diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index 6c2c9b9..43150b2 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -53,7 +53,7 @@ /* Internal function declarations */ -static int com90xx_found(int ioaddr, int airq, u_long shmem); +static int com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem *); static void com90xx_command(struct net_device *dev, int command); static int com90xx_status(struct net_device *dev); static void com90xx_setmask(struct net_device *dev, int mask); @@ -116,14 +116,26 @@ static void __init com90xx_probe(void) unsigned long airqmask; int ports[(0x3f0 - 0x200) / 16 + 1] = {0}; - u_long shmems[(0xFF800 - 0xA0000) / 2048 + 1] = - {0}; + unsigned long *shmems; + void __iomem **iomem; int numports, numshmems, *port; u_long *p; + int index; if (!io && !irq && !shmem && !*device && com90xx_skip_probe) return; + shmems = kzalloc(((0x10000-0xa0000) / 0x800) * sizeof(unsigned long), + GFP_KERNEL); + if (!shmems) + return; + iomem = kzalloc(((0x10000-0xa0000) / 0x800) * sizeof(void __iomem *), + GFP_KERNEL); + if (!iomem) { + kfree(shmems); + return; + } + BUGLVL(D_NORMAL) printk(VERSION); /* set up the arrays where we'll store the possible probe addresses */ @@ -179,6 +191,8 @@ static void __init com90xx_probe(void) if (!numports) { BUGMSG2(D_NORMAL, "S1: No ARCnet cards found.\n"); + kfree(shmems); + kfree(iomem); return; } /* Stage 2: we have now reset any possible ARCnet cards, so we can't @@ -202,8 +216,8 @@ static void __init com90xx_probe(void) * 0xD1 byte in the right place, or are read-only. */ numprint = -1; - for (p = &shmems[0]; p < shmems + numshmems; p++) { - u_long ptr = *p; + for (index = 0, p = &shmems[0]; index < numshmems; p++, index++) { + void __iomem *base; numprint++; numprint %= 8; @@ -213,38 +227,49 @@ static void __init com90xx_probe(void) } BUGMSG2(D_INIT, "%lXh ", *p); - if (!request_mem_region(*p, BUFFER_SIZE, "arcnet (90xx)")) { + if (!request_mem_region(*p, MIRROR_SIZE, "arcnet (90xx)")) { BUGMSG2(D_INIT_REASONS, "(request_mem_region)\n"); BUGMSG2(D_INIT_REASONS, "Stage 3: "); BUGLVL(D_INIT_REASONS) numprint = 0; - *p-- = shmems[--numshmems]; - continue; + goto out; + } + base = ioremap(*p, MIRROR_SIZE); + if (!base) { + BUGMSG2(D_INIT_REASONS, "(ioremap)\n"); + BUGMSG2(D_INIT_REASONS, "Stage 3: "); + BUGLVL(D_INIT_REASONS) numprint = 0; + goto out1; } - if (isa_readb(ptr) != TESTvalue) { + if (readb(base) != TESTvalue) { BUGMSG2(D_INIT_REASONS, "(%02Xh != %02Xh)\n", - isa_readb(ptr), TESTvalue); + readb(base), TESTvalue); BUGMSG2(D_INIT_REASONS, "S3: "); BUGLVL(D_INIT_REASONS) numprint = 0; - release_mem_region(*p, BUFFER_SIZE); - *p-- = shmems[--numshmems]; - continue; + goto out2; } /* By writing 0x42 to the TESTvalue location, we also make * sure no "mirror" shmem areas show up - if they occur * in another pass through this loop, they will be discarded * because *cptr != TESTvalue. */ - isa_writeb(0x42, ptr); - if (isa_readb(ptr) != 0x42) { + writeb(0x42, base); + if (readb(base) != 0x42) { BUGMSG2(D_INIT_REASONS, "(read only)\n"); BUGMSG2(D_INIT_REASONS, "S3: "); - release_mem_region(*p, BUFFER_SIZE); - *p-- = shmems[--numshmems]; - continue; + goto out2; } BUGMSG2(D_INIT_REASONS, "\n"); BUGMSG2(D_INIT_REASONS, "S3: "); BUGLVL(D_INIT_REASONS) numprint = 0; + iomem[index] = base; + continue; + out2: + iounmap(base); + out1: + release_mem_region(*p, MIRROR_SIZE); + out: + *p-- = shmems[--numshmems]; + index--; } BUGMSG2(D_INIT, "\n"); @@ -252,6 +277,8 @@ static void __init com90xx_probe(void) BUGMSG2(D_NORMAL, "S3: No ARCnet cards found.\n"); for (port = &ports[0]; port < ports + numports; port++) release_region(*port, ARCNET_TOTAL_SIZE); + kfree(shmems); + kfree(iomem); return; } /* Stage 4: something of a dummy, to report the shmems that are @@ -351,30 +378,32 @@ static void __init com90xx_probe(void) mdelay(RESETtime); } else { /* just one shmem and port, assume they match */ - isa_writeb(TESTvalue, shmems[0]); + writeb(TESTvalue, iomem[0]); } #else inb(_RESET); mdelay(RESETtime); #endif - for (p = &shmems[0]; p < shmems + numshmems; p++) { - u_long ptr = *p; + for (index = 0; index < numshmems; index++) { + u_long ptr = shmems[index]; + void __iomem *base = iomem[index]; - if (isa_readb(ptr) == TESTvalue) { /* found one */ + if (readb(base) == TESTvalue) { /* found one */ BUGMSG2(D_INIT, "%lXh)\n", *p); openparen = 0; /* register the card */ - if (com90xx_found(*port, airq, *p) == 0) + if (com90xx_found(*port, airq, ptr, base) == 0) found = 1; numprint = -1; /* remove shmem from the list */ - *p = shmems[--numshmems]; + shmems[index] = shmems[--numshmems]; + iomem[index] = iomem[numshmems]; break; /* go to the next I/O port */ } else { - BUGMSG2(D_INIT_REASONS, "%Xh-", isa_readb(ptr)); + BUGMSG2(D_INIT_REASONS, "%Xh-", readb(base)); } } @@ -391,17 +420,40 @@ static void __init com90xx_probe(void) BUGLVL(D_INIT_REASONS) printk("\n"); /* Now put back TESTvalue on all leftover shmems. */ - for (p = &shmems[0]; p < shmems + numshmems; p++) { - isa_writeb(TESTvalue, *p); - release_mem_region(*p, BUFFER_SIZE); + for (index = 0; index < numshmems; index++) { + writeb(TESTvalue, iomem[index]); + iounmap(iomem[index]); + release_mem_region(shmems[index], MIRROR_SIZE); } + kfree(shmems); + kfree(iomem); } +static int check_mirror(unsigned long addr, size_t size) +{ + void __iomem *p; + int res = -1; + + if (!request_mem_region(addr, size, "arcnet (90xx)")) + return -1; + + p = ioremap(addr, size); + if (p) { + if (readb(p) == TESTvalue) + res = 1; + else + res = 0; + iounmap(p); + } + + release_mem_region(addr, size); + return res; +} /* Set up the struct net_device associated with this card. Called after * probing succeeds. */ -static int __init com90xx_found(int ioaddr, int airq, u_long shmem) +static int __init com90xx_found(int ioaddr, int airq, u_long shmem, void __iomem *p) { struct net_device *dev = NULL; struct arcnet_local *lp; @@ -412,7 +464,8 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem) dev = alloc_arcdev(device); if (!dev) { BUGMSG2(D_NORMAL, "com90xx: Can't allocate device!\n"); - release_mem_region(shmem, BUFFER_SIZE); + iounmap(p); + release_mem_region(shmem, MIRROR_SIZE); return -ENOMEM; } lp = dev->priv; @@ -423,24 +476,27 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem) * 2k (or there are no mirrors at all) but on some, it's 4k. */ mirror_size = MIRROR_SIZE; - if (isa_readb(shmem) == TESTvalue - && isa_readb(shmem - mirror_size) != TESTvalue - && isa_readb(shmem - 2 * mirror_size) == TESTvalue) - mirror_size *= 2; + if (readb(p) == TESTvalue && + check_mirror(shmem - MIRROR_SIZE, MIRROR_SIZE) == 0 && + check_mirror(shmem - 2 * MIRROR_SIZE, MIRROR_SIZE) == 1) + mirror_size = 2 * MIRROR_SIZE; - first_mirror = last_mirror = shmem; - while (isa_readb(first_mirror) == TESTvalue) + first_mirror = shmem - mirror_size; + while (check_mirror(first_mirror, mirror_size) == 1) first_mirror -= mirror_size; first_mirror += mirror_size; - while (isa_readb(last_mirror) == TESTvalue) + last_mirror = shmem + mirror_size; + while (check_mirror(last_mirror, mirror_size) == 1) last_mirror += mirror_size; last_mirror -= mirror_size; dev->mem_start = first_mirror; dev->mem_end = last_mirror + MIRROR_SIZE - 1; - release_mem_region(shmem, BUFFER_SIZE); + iounmap(p); + release_mem_region(shmem, MIRROR_SIZE); + if (!request_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1, "arcnet (90xx)")) goto err_free_dev; diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c index 6d79137..6d6c69f 100644 --- a/drivers/net/arcnet/rfc1051.c +++ b/drivers/net/arcnet/rfc1051.c @@ -43,7 +43,7 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length, int bufnum); -struct ArcProto rfc1051_proto = +static struct ArcProto rfc1051_proto = { .suffix = 's', .mtu = XMTU - RFC1051_HDR_SIZE, diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c index 6b6ae4b..bee3422 100644 --- a/drivers/net/arcnet/rfc1201.c +++ b/drivers/net/arcnet/rfc1201.c @@ -43,7 +43,7 @@ static int prepare_tx(struct net_device *dev, struct archdr *pkt, int length, int bufnum); static int continue_tx(struct net_device *dev, int bufnum); -struct ArcProto rfc1201_proto = +static struct ArcProto rfc1201_proto = { .suffix = 'a', .mtu = 1500, /* could be more, but some receivers can't handle it... */ diff --git a/drivers/net/arm/etherh.c b/drivers/net/arm/etherh.c index 6a93b66..d52deb8 100644 --- a/drivers/net/arm/etherh.c +++ b/drivers/net/arm/etherh.c @@ -46,6 +46,7 @@ #include <linux/device.h> #include <linux/init.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/ecard.h> @@ -355,7 +356,7 @@ etherh_block_output (struct net_device *dev, int count, const unsigned char *buf dma_start = jiffies; while ((readb (addr + EN0_ISR) & ENISR_RDC) == 0) - if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ + if (time_after(jiffies, dma_start + 2*HZ/100)) { /* 20ms */ printk(KERN_ERR "%s: timeout waiting for TX RDC\n", dev->name); etherh_reset (dev); diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index a24200d..b787b65 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -46,7 +46,7 @@ typedef enum { } board_t; /* indexed by board_t, above */ -static struct { +static const struct { char *name; } board_info[] __devinitdata = { { "Broadcom NetXtreme II BCM5706 1000Base-T" }, @@ -3476,7 +3476,7 @@ bnx2_test_registers(struct bnx2 *bp) { int ret; int i; - static struct { + static const struct { u16 offset; u16 flags; u32 rw_mask; @@ -3891,7 +3891,7 @@ reg_test_err: static int bnx2_do_mem_test(struct bnx2 *bp, u32 start, u32 size) { - static u32 test_pattern[] = { 0x00000000, 0xffffffff, 0x55555555, + static const u32 test_pattern[] = { 0x00000000, 0xffffffff, 0x55555555, 0xaaaaaaaa , 0xaa55aa55, 0x55aa55aa }; int i; @@ -3916,7 +3916,7 @@ bnx2_test_memory(struct bnx2 *bp) { int ret = 0; int i; - static struct { + static const struct { u32 offset; u32 len; } mem_tbl[] = { @@ -5122,7 +5122,7 @@ static struct { #define STATS_OFFSET32(offset_name) (offsetof(struct statistics_block, offset_name) / 4) -static unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { +static const unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { STATS_OFFSET32(stat_IfHCInOctets_hi), STATS_OFFSET32(stat_IfHCInBadOctets_hi), STATS_OFFSET32(stat_IfHCOutOctets_hi), diff --git a/drivers/net/bnx2_fw.h b/drivers/net/bnx2_fw.h index 0c21bd8..8158974 100644 --- a/drivers/net/bnx2_fw.h +++ b/drivers/net/bnx2_fw.h @@ -14,20 +14,20 @@ * accompanying it. */ -static int bnx2_COM_b06FwReleaseMajor = 0x1; -static int bnx2_COM_b06FwReleaseMinor = 0x0; -static int bnx2_COM_b06FwReleaseFix = 0x0; -static u32 bnx2_COM_b06FwStartAddr = 0x080008b4; -static u32 bnx2_COM_b06FwTextAddr = 0x08000000; -static int bnx2_COM_b06FwTextLen = 0x57bc; -static u32 bnx2_COM_b06FwDataAddr = 0x08005840; -static int bnx2_COM_b06FwDataLen = 0x0; -static u32 bnx2_COM_b06FwRodataAddr = 0x080057c0; -static int bnx2_COM_b06FwRodataLen = 0x58; -static u32 bnx2_COM_b06FwBssAddr = 0x08005860; -static int bnx2_COM_b06FwBssLen = 0x88; -static u32 bnx2_COM_b06FwSbssAddr = 0x08005840; -static int bnx2_COM_b06FwSbssLen = 0x1c; +static const int bnx2_COM_b06FwReleaseMajor = 0x1; +static const int bnx2_COM_b06FwReleaseMinor = 0x0; +static const int bnx2_COM_b06FwReleaseFix = 0x0; +static const u32 bnx2_COM_b06FwStartAddr = 0x080008b4; +static const u32 bnx2_COM_b06FwTextAddr = 0x08000000; +static const int bnx2_COM_b06FwTextLen = 0x57bc; +static const u32 bnx2_COM_b06FwDataAddr = 0x08005840; +static const int bnx2_COM_b06FwDataLen = 0x0; +static const u32 bnx2_COM_b06FwRodataAddr = 0x080057c0; +static const int bnx2_COM_b06FwRodataLen = 0x58; +static const u32 bnx2_COM_b06FwBssAddr = 0x08005860; +static const int bnx2_COM_b06FwBssLen = 0x88; +static const u32 bnx2_COM_b06FwSbssAddr = 0x08005840; +static const int bnx2_COM_b06FwSbssLen = 0x1c; static u32 bnx2_COM_b06FwText[(0x57bc/4) + 1] = { 0x0a00022d, 0x00000000, 0x00000000, 0x0000000d, 0x636f6d20, 0x322e352e, 0x38000000, 0x02050802, 0x00000000, 0x00000003, 0x00000014, 0x00000032, @@ -2325,20 +2325,20 @@ static u32 bnx2_rv2p_proc2[] = { 0x0000000c, 0x29520000, 0x00000018, 0x80000002, 0x0000000c, 0x29800000, 0x00000018, 0x00570000 }; -static int bnx2_TPAT_b06FwReleaseMajor = 0x1; -static int bnx2_TPAT_b06FwReleaseMinor = 0x0; -static int bnx2_TPAT_b06FwReleaseFix = 0x0; -static u32 bnx2_TPAT_b06FwStartAddr = 0x08000860; -static u32 bnx2_TPAT_b06FwTextAddr = 0x08000800; -static int bnx2_TPAT_b06FwTextLen = 0x122c; -static u32 bnx2_TPAT_b06FwDataAddr = 0x08001a60; -static int bnx2_TPAT_b06FwDataLen = 0x0; -static u32 bnx2_TPAT_b06FwRodataAddr = 0x00000000; -static int bnx2_TPAT_b06FwRodataLen = 0x0; -static u32 bnx2_TPAT_b06FwBssAddr = 0x08001aa0; -static int bnx2_TPAT_b06FwBssLen = 0x250; -static u32 bnx2_TPAT_b06FwSbssAddr = 0x08001a60; -static int bnx2_TPAT_b06FwSbssLen = 0x34; +static const int bnx2_TPAT_b06FwReleaseMajor = 0x1; +static const int bnx2_TPAT_b06FwReleaseMinor = 0x0; +static const int bnx2_TPAT_b06FwReleaseFix = 0x0; +static const u32 bnx2_TPAT_b06FwStartAddr = 0x08000860; +static const u32 bnx2_TPAT_b06FwTextAddr = 0x08000800; +static const int bnx2_TPAT_b06FwTextLen = 0x122c; +static const u32 bnx2_TPAT_b06FwDataAddr = 0x08001a60; +static const int bnx2_TPAT_b06FwDataLen = 0x0; +static const u32 bnx2_TPAT_b06FwRodataAddr = 0x00000000; +static const int bnx2_TPAT_b06FwRodataLen = 0x0; +static const u32 bnx2_TPAT_b06FwBssAddr = 0x08001aa0; +static const int bnx2_TPAT_b06FwBssLen = 0x250; +static const u32 bnx2_TPAT_b06FwSbssAddr = 0x08001a60; +static const int bnx2_TPAT_b06FwSbssLen = 0x34; static u32 bnx2_TPAT_b06FwText[(0x122c/4) + 1] = { 0x0a000218, 0x00000000, 0x00000000, 0x0000000d, 0x74706174, 0x20322e35, 0x2e313100, 0x02050b01, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -2540,20 +2540,20 @@ static u32 bnx2_TPAT_b06FwRodata[(0x0/4) + 1] = { 0x0 }; static u32 bnx2_TPAT_b06FwBss[(0x250/4) + 1] = { 0x0 }; static u32 bnx2_TPAT_b06FwSbss[(0x34/4) + 1] = { 0x0 }; -static int bnx2_TXP_b06FwReleaseMajor = 0x1; -static int bnx2_TXP_b06FwReleaseMinor = 0x0; -static int bnx2_TXP_b06FwReleaseFix = 0x0; -static u32 bnx2_TXP_b06FwStartAddr = 0x080034b0; -static u32 bnx2_TXP_b06FwTextAddr = 0x08000000; -static int bnx2_TXP_b06FwTextLen = 0x5748; -static u32 bnx2_TXP_b06FwDataAddr = 0x08005760; -static int bnx2_TXP_b06FwDataLen = 0x0; -static u32 bnx2_TXP_b06FwRodataAddr = 0x00000000; -static int bnx2_TXP_b06FwRodataLen = 0x0; -static u32 bnx2_TXP_b06FwBssAddr = 0x080057a0; -static int bnx2_TXP_b06FwBssLen = 0x1c4; -static u32 bnx2_TXP_b06FwSbssAddr = 0x08005760; -static int bnx2_TXP_b06FwSbssLen = 0x38; +static const int bnx2_TXP_b06FwReleaseMajor = 0x1; +static const int bnx2_TXP_b06FwReleaseMinor = 0x0; +static const int bnx2_TXP_b06FwReleaseFix = 0x0; +static const u32 bnx2_TXP_b06FwStartAddr = 0x080034b0; +static const u32 bnx2_TXP_b06FwTextAddr = 0x08000000; +static const int bnx2_TXP_b06FwTextLen = 0x5748; +static const u32 bnx2_TXP_b06FwDataAddr = 0x08005760; +static const int bnx2_TXP_b06FwDataLen = 0x0; +static const u32 bnx2_TXP_b06FwRodataAddr = 0x00000000; +static const int bnx2_TXP_b06FwRodataLen = 0x0; +static const u32 bnx2_TXP_b06FwBssAddr = 0x080057a0; +static const int bnx2_TXP_b06FwBssLen = 0x1c4; +static const u32 bnx2_TXP_b06FwSbssAddr = 0x08005760; +static const int bnx2_TXP_b06FwSbssLen = 0x38; static u32 bnx2_TXP_b06FwText[(0x5748/4) + 1] = { 0x0a000d2c, 0x00000000, 0x00000000, 0x0000000d, 0x74787020, 0x322e352e, 0x38000000, 0x02050800, 0x0000000a, 0x000003e8, 0x0000ea60, 0x00000000, diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index f2a6318..e83bc82 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1261,7 +1261,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct ethhdr *eth_data; struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct slave *tx_slave = NULL; - static u32 ip_bcast = 0xffffffff; + static const u32 ip_bcast = 0xffffffff; int hash_size = 0; int do_tx_balance = 1; u32 hash_index = 0; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bcf9f17..2d0ac16 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -131,7 +131,7 @@ MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); /*----------------------------- Global variables ----------------------------*/ -static const char *version = +static const char * const version = DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; LIST_HEAD(bond_dev_list); @@ -1040,6 +1040,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) if ((bond->params.mode == BOND_MODE_TLB) || (bond->params.mode == BOND_MODE_ALB)) { bond_alb_handle_active_change(bond, new_active); + if (old_active) + bond_set_slave_inactive_flags(old_active); + if (new_active) + bond_set_slave_active_flags(new_active); } else { bond->curr_active_slave = new_active; } @@ -1443,15 +1447,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) switch (bond->params.mode) { case BOND_MODE_ACTIVEBACKUP: - /* if we're in active-backup mode, we need one and only one active - * interface. The backup interfaces will have their NOARP flag set - * because we need them to be completely deaf and not to respond to - * any ARP request on the network to avoid fooling a switch. Thus, - * since we guarantee that curr_active_slave always point to the last - * usable interface, we just have to verify this interface's flag. + /* if we're in active-backup mode, we need one and + * only one active interface. The backup interfaces + * will have their SLAVE_INACTIVE flag set because we + * need them to be drop all packets. Thus, since we + * guarantee that curr_active_slave always point to + * the last usable interface, we just have to verify + * this interface's flag. */ if (((!bond->curr_active_slave) || - (bond->curr_active_slave->dev->flags & IFF_NOARP)) && + (bond->curr_active_slave->dev->priv_flags & IFF_SLAVE_INACTIVE)) && (new_slave->link != BOND_LINK_DOWN)) { dprintk("This is the first active slave\n"); /* first slave or no active slave yet, and this link @@ -1492,6 +1497,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) * is OK, so make this interface the active one */ bond_change_active_slave(bond, new_slave); + } else { + bond_set_slave_inactive_flags(new_slave); } break; default: @@ -1724,13 +1731,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) addr.sa_family = slave_dev->type; dev_set_mac_address(slave_dev, &addr); - /* restore the original state of the - * IFF_NOARP flag that might have been - * set by bond_set_slave_inactive_flags() - */ - if ((slave->original_flags & IFF_NOARP) == 0) { - slave_dev->flags &= ~IFF_NOARP; - } + slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | + IFF_SLAVE_INACTIVE); kfree(slave); @@ -1816,12 +1818,8 @@ static int bond_release_all(struct net_device *bond_dev) addr.sa_family = slave_dev->type; dev_set_mac_address(slave_dev, &addr); - /* restore the original state of the IFF_NOARP flag that might have - * been set by bond_set_slave_inactive_flags() - */ - if ((slave->original_flags & IFF_NOARP) == 0) { - slave_dev->flags &= ~IFF_NOARP; - } + slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | + IFF_SLAVE_INACTIVE); kfree(slave); @@ -4061,14 +4059,17 @@ void bond_set_mode_ops(struct bonding *bond, int mode) bond_dev->hard_start_xmit = bond_xmit_broadcast; break; case BOND_MODE_8023AD: + bond_set_master_3ad_flags(bond); bond_dev->hard_start_xmit = bond_3ad_xmit_xor; if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) bond->xmit_hash_policy = bond_xmit_hash_policy_l34; else bond->xmit_hash_policy = bond_xmit_hash_policy_l2; break; - case BOND_MODE_TLB: case BOND_MODE_ALB: + bond_set_master_alb_flags(bond); + /* FALLTHRU */ + case BOND_MODE_TLB: bond_dev->hard_start_xmit = bond_alb_xmit; bond_dev->set_mac_address = bond_alb_set_mac_address; break; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 041bcc5..5a9bd95 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -424,6 +424,12 @@ static ssize_t bonding_store_mode(struct class_device *cd, const char *buf, size ret = -EINVAL; goto out; } else { + if (bond->params.mode == BOND_MODE_8023AD) + bond_unset_master_3ad_flags(bond); + + if (bond->params.mode == BOND_MODE_ALB) + bond_unset_master_alb_flags(bond); + bond->params.mode = new_value; bond_set_mode_ops(bond, bond->params.mode); printk(KERN_INFO DRV_NAME ": %s: setting mode to %s (%d).\n", diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 3dd78d0..ce9dc9b 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -22,8 +22,8 @@ #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.0.1" -#define DRV_RELDATE "January 9, 2006" +#define DRV_VERSION "3.0.2" +#define DRV_RELDATE "February 21, 2006" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -230,14 +230,37 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) static inline void bond_set_slave_inactive_flags(struct slave *slave) { - slave->state = BOND_STATE_BACKUP; - slave->dev->flags |= IFF_NOARP; + struct bonding *bond = slave->dev->master->priv; + if (bond->params.mode != BOND_MODE_TLB && + bond->params.mode != BOND_MODE_ALB) + slave->state = BOND_STATE_BACKUP; + slave->dev->priv_flags |= IFF_SLAVE_INACTIVE; } static inline void bond_set_slave_active_flags(struct slave *slave) { slave->state = BOND_STATE_ACTIVE; - slave->dev->flags &= ~IFF_NOARP; + slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE; +} + +static inline void bond_set_master_3ad_flags(struct bonding *bond) +{ + bond->dev->priv_flags |= IFF_MASTER_8023AD; +} + +static inline void bond_unset_master_3ad_flags(struct bonding *bond) +{ + bond->dev->priv_flags &= ~IFF_MASTER_8023AD; +} + +static inline void bond_set_master_alb_flags(struct bonding *bond) +{ + bond->dev->priv_flags |= IFF_MASTER_ALB; +} + +static inline void bond_unset_master_alb_flags(struct bonding *bond) +{ + bond->dev->priv_flags &= ~IFF_MASTER_ALB; } struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); diff --git a/drivers/net/chelsio/espi.c b/drivers/net/chelsio/espi.c index e824aca..542e5e0 100644 --- a/drivers/net/chelsio/espi.c +++ b/drivers/net/chelsio/espi.c @@ -87,15 +87,9 @@ static int tricn_write(adapter_t *adapter, int bundle_addr, int module_addr, static int tricn_init(adapter_t *adapter) { int i = 0; - int sme = 1; int stat = 0; int timeout = 0; int is_ready = 0; - int dynamic_deskew = 0; - - if (dynamic_deskew) - sme = 0; - /* 1 */ timeout=1000; @@ -113,11 +107,9 @@ static int tricn_init(adapter_t *adapter) } /* 2 */ - if (sme) { - tricn_write(adapter, 0, 0, 0, TRICN_CNFG, 0x81); - tricn_write(adapter, 0, 1, 0, TRICN_CNFG, 0x81); - tricn_write(adapter, 0, 2, 0, TRICN_CNFG, 0x81); - } + tricn_write(adapter, 0, 0, 0, TRICN_CNFG, 0x81); + tricn_write(adapter, 0, 1, 0, TRICN_CNFG, 0x81); + tricn_write(adapter, 0, 2, 0, TRICN_CNFG, 0x81); for (i=1; i<= 8; i++) tricn_write(adapter, 0, 0, i, TRICN_CNFG, 0xf1); for (i=1; i<= 2; i++) tricn_write(adapter, 0, 1, i, TRICN_CNFG, 0xf1); for (i=1; i<= 3; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xe1); diff --git a/drivers/net/chelsio/subr.c b/drivers/net/chelsio/subr.c index 1ebb5d1..12e4e96 100644 --- a/drivers/net/chelsio/subr.c +++ b/drivers/net/chelsio/subr.c @@ -686,7 +686,7 @@ int t1_init_hw_modules(adapter_t *adapter) */ static void __devinit get_pci_mode(adapter_t *adapter, struct chelsio_pci_params *p) { - static unsigned short speed_map[] = { 33, 66, 100, 133 }; + static const unsigned short speed_map[] = { 33, 66, 100, 133 }; u32 pci_mode; pci_read_config_dword(adapter->pdev, A_PCICFG_MODE, &pci_mode); diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c index 70b47e4..32d13166 100644 --- a/drivers/net/dgrs.c +++ b/drivers/net/dgrs.c @@ -993,7 +993,7 @@ dgrs_download(struct net_device *dev0) int is; unsigned long i; - static int iv2is[16] = { + static const int iv2is[16] = { 0, 0, 0, ES4H_IS_INT3, 0, ES4H_IS_INT5, 0, ES4H_IS_INT7, 0, 0, ES4H_IS_INT10, ES4H_IS_INT11, diff --git a/drivers/net/dgrs_firmware.c b/drivers/net/dgrs_firmware.c index 1e49e1e..8c20d4c 100644 --- a/drivers/net/dgrs_firmware.c +++ b/drivers/net/dgrs_firmware.c @@ -1,4 +1,4 @@ -static int dgrs_firmnum = 550; +static const int dgrs_firmnum = 550; static char dgrs_firmver[] = "$Version$"; static char dgrs_firmdate[] = "11/16/96 03:45:15"; static unsigned char dgrs_code[] __initdata = { @@ -9963,4 +9963,4 @@ static unsigned char dgrs_code[] __initdata = { 109,46,99,0,114,99,0,0,48,120,0,0, 0,0,0,0,0,0,0,0,0,0,0,0 } ; -static int dgrs_ncode = 119520 ; +static const int dgrs_ncode = 119520 ; diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index fb9dae3..1f36274 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -90,8 +90,8 @@ module_param(tx_coalesce, int, 0); /* HW xmit count each TxDMAComplete */ #define EnableInt() \ writew(DEFAULT_INTR, ioaddr + IntEnable) -static int max_intrloop = 50; -static int multicast_filter_limit = 0x40; +static const int max_intrloop = 50; +static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); static void rio_timer (unsigned long data); diff --git a/drivers/net/e100.c b/drivers/net/e100.c index f57a85f..31ac001 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -598,8 +598,8 @@ static void e100_enable_irq(struct nic *nic) spin_lock_irqsave(&nic->cmd_lock, flags); writeb(irq_mask_none, &nic->csr->scb.cmd_hi); - spin_unlock_irqrestore(&nic->cmd_lock, flags); e100_write_flush(nic); + spin_unlock_irqrestore(&nic->cmd_lock, flags); } static void e100_disable_irq(struct nic *nic) @@ -608,8 +608,8 @@ static void e100_disable_irq(struct nic *nic) spin_lock_irqsave(&nic->cmd_lock, flags); writeb(irq_mask_all, &nic->csr->scb.cmd_hi); - spin_unlock_irqrestore(&nic->cmd_lock, flags); e100_write_flush(nic); + spin_unlock_irqrestore(&nic->cmd_lock, flags); } static void e100_hw_reset(struct nic *nic) @@ -1582,8 +1582,8 @@ static void e100_watchdog(unsigned long data) * interrupt mask bit and the SW Interrupt generation bit */ spin_lock_irq(&nic->cmd_lock); writeb(readb(&nic->csr->scb.cmd_hi) | irq_sw_gen,&nic->csr->scb.cmd_hi); - spin_unlock_irq(&nic->cmd_lock); e100_write_flush(nic); + spin_unlock_irq(&nic->cmd_lock); e100_update_stats(nic); e100_adjust_adaptive_ifs(nic, cmd.speed, cmd.duplex); diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index 99baf0e..281de41 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -83,10 +83,6 @@ struct e1000_adapter; #include "e1000_hw.h" -#ifdef CONFIG_E1000_MQ -#include <linux/cpu.h> -#include <linux/smp.h> -#endif #ifdef DBG #define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args) @@ -169,12 +165,6 @@ struct e1000_buffer { uint16_t next_to_watch; }; -#ifdef CONFIG_E1000_MQ -struct e1000_queue_stats { - uint64_t packets; - uint64_t bytes; -}; -#endif struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; }; struct e1000_ps_page_dma { uint64_t ps_page_dma[PS_PAGE_BUFFERS]; }; @@ -198,12 +188,7 @@ struct e1000_tx_ring { spinlock_t tx_lock; uint16_t tdh; uint16_t tdt; - boolean_t last_tx_tso; - -#ifdef CONFIG_E1000_MQ - struct e1000_queue_stats tx_stats; -#endif }; struct e1000_rx_ring { @@ -230,9 +215,6 @@ struct e1000_rx_ring { uint16_t rdh; uint16_t rdt; -#ifdef CONFIG_E1000_MQ - struct e1000_queue_stats rx_stats; -#endif }; #define E1000_DESC_UNUSED(R) \ @@ -260,6 +242,7 @@ struct e1000_adapter { uint32_t rx_buffer_len; uint32_t part_num; uint32_t wol; + uint32_t ksp3_port_a; uint32_t smartspeed; uint32_t en_mng_pt; uint16_t link_speed; @@ -269,8 +252,8 @@ struct e1000_adapter { spinlock_t tx_queue_lock; #endif atomic_t irq_sem; - struct work_struct tx_timeout_task; struct work_struct watchdog_task; + struct work_struct reset_task; uint8_t fc_autoneg; struct timer_list blink_timer; @@ -278,9 +261,6 @@ struct e1000_adapter { /* TX */ struct e1000_tx_ring *tx_ring; /* One per active queue */ -#ifdef CONFIG_E1000_MQ - struct e1000_tx_ring **cpu_tx_ring; /* per-cpu */ -#endif unsigned long tx_queue_len; uint32_t txd_cmd; uint32_t tx_int_delay; @@ -301,24 +281,19 @@ struct e1000_adapter { /* RX */ #ifdef CONFIG_E1000_NAPI boolean_t (*clean_rx) (struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring, - int *work_done, int work_to_do); + struct e1000_rx_ring *rx_ring, + int *work_done, int work_to_do); #else boolean_t (*clean_rx) (struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); + struct e1000_rx_ring *rx_ring); #endif void (*alloc_rx_buf) (struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring, - int cleaned_count); + struct e1000_rx_ring *rx_ring, + int cleaned_count); struct e1000_rx_ring *rx_ring; /* One per active queue */ #ifdef CONFIG_E1000_NAPI struct net_device *polling_netdev; /* One per active queue */ #endif -#ifdef CONFIG_E1000_MQ - struct net_device **cpu_netdev; /* per-cpu */ - struct call_async_data_struct rx_sched_call_data; - cpumask_t cpumask; -#endif int num_tx_queues; int num_rx_queues; @@ -353,10 +328,37 @@ struct e1000_adapter { struct e1000_rx_ring test_rx_ring; - u32 *config_space; + uint32_t *config_space; int msg_enable; #ifdef CONFIG_PCI_MSI boolean_t have_msi; #endif + /* to not mess up cache alignment, always add to the bottom */ + boolean_t txb2b; +#ifdef NETIF_F_TSO + boolean_t tso_force; +#endif }; + + +/* e1000_main.c */ +extern char e1000_driver_name[]; +extern char e1000_driver_version[]; +int e1000_up(struct e1000_adapter *adapter); +void e1000_down(struct e1000_adapter *adapter); +void e1000_reset(struct e1000_adapter *adapter); +int e1000_setup_all_tx_resources(struct e1000_adapter *adapter); +void e1000_free_all_tx_resources(struct e1000_adapter *adapter); +int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); +void e1000_free_all_rx_resources(struct e1000_adapter *adapter); +void e1000_update_stats(struct e1000_adapter *adapter); +int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); + +/* e1000_ethtool.c */ +void e1000_set_ethtool_ops(struct net_device *netdev); + +/* e1000_param.c */ +void e1000_check_options(struct e1000_adapter *adapter); + + #endif /* _E1000_H_ */ diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index 5cedc81..ecccca3 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -32,19 +32,6 @@ #include <asm/uaccess.h> -extern char e1000_driver_name[]; -extern char e1000_driver_version[]; - -extern int e1000_up(struct e1000_adapter *adapter); -extern void e1000_down(struct e1000_adapter *adapter); -extern void e1000_reset(struct e1000_adapter *adapter); -extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); -extern int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); -extern int e1000_setup_all_tx_resources(struct e1000_adapter *adapter); -extern void e1000_free_all_rx_resources(struct e1000_adapter *adapter); -extern void e1000_free_all_tx_resources(struct e1000_adapter *adapter); -extern void e1000_update_stats(struct e1000_adapter *adapter); - struct e1000_stats { char stat_string[ETH_GSTRING_LEN]; int sizeof_stat; @@ -60,7 +47,6 @@ static const struct e1000_stats e1000_gstrings_stats[] = { { "tx_bytes", E1000_STAT(net_stats.tx_bytes) }, { "rx_errors", E1000_STAT(net_stats.rx_errors) }, { "tx_errors", E1000_STAT(net_stats.tx_errors) }, - { "rx_dropped", E1000_STAT(net_stats.rx_dropped) }, { "tx_dropped", E1000_STAT(net_stats.tx_dropped) }, { "multicast", E1000_STAT(net_stats.multicast) }, { "collisions", E1000_STAT(net_stats.collisions) }, @@ -68,7 +54,6 @@ static const struct e1000_stats e1000_gstrings_stats[] = { { "rx_over_errors", E1000_STAT(net_stats.rx_over_errors) }, { "rx_crc_errors", E1000_STAT(net_stats.rx_crc_errors) }, { "rx_frame_errors", E1000_STAT(net_stats.rx_frame_errors) }, - { "rx_fifo_errors", E1000_STAT(net_stats.rx_fifo_errors) }, { "rx_no_buffer_count", E1000_STAT(stats.rnbc) }, { "rx_missed_errors", E1000_STAT(net_stats.rx_missed_errors) }, { "tx_aborted_errors", E1000_STAT(net_stats.tx_aborted_errors) }, @@ -97,14 +82,7 @@ static const struct e1000_stats e1000_gstrings_stats[] = { { "alloc_rx_buff_failed", E1000_STAT(alloc_rx_buff_failed) }, }; -#ifdef CONFIG_E1000_MQ -#define E1000_QUEUE_STATS_LEN \ - (((struct e1000_adapter *)netdev->priv)->num_tx_queues + \ - ((struct e1000_adapter *)netdev->priv)->num_rx_queues) \ - * (sizeof(struct e1000_queue_stats) / sizeof(uint64_t)) -#else #define E1000_QUEUE_STATS_LEN 0 -#endif #define E1000_GLOBAL_STATS_LEN \ sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats) #define E1000_STATS_LEN (E1000_GLOBAL_STATS_LEN + E1000_QUEUE_STATS_LEN) @@ -346,6 +324,9 @@ e1000_set_tso(struct net_device *netdev, uint32_t data) netdev->features |= NETIF_F_TSO; else netdev->features &= ~NETIF_F_TSO; + + DPRINTK(PROBE, INFO, "TSO is %s\n", data ? "Enabled" : "Disabled"); + adapter->tso_force = TRUE; return 0; } #endif /* NETIF_F_TSO */ @@ -594,6 +575,7 @@ e1000_get_drvinfo(struct net_device *netdev, case e1000_82571: case e1000_82572: case e1000_82573: + case e1000_80003es2lan: sprintf(firmware_version, "%d.%d-%d", (eeprom_data & 0xF000) >> 12, (eeprom_data & 0x0FF0) >> 4, @@ -642,6 +624,9 @@ e1000_set_ringparam(struct net_device *netdev, struct e1000_rx_ring *rxdr, *rx_old, *rx_new; int i, err, tx_ring_size, rx_ring_size; + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + tx_ring_size = sizeof(struct e1000_tx_ring) * adapter->num_tx_queues; rx_ring_size = sizeof(struct e1000_rx_ring) * adapter->num_rx_queues; @@ -669,9 +654,6 @@ e1000_set_ringparam(struct net_device *netdev, txdr = adapter->tx_ring; rxdr = adapter->rx_ring; - if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) - return -EINVAL; - rxdr->count = max(ring->rx_pending,(uint32_t)E1000_MIN_RXD); rxdr->count = min(rxdr->count,(uint32_t)(mac_type < e1000_82544 ? E1000_MAX_RXD : E1000_MAX_82544_RXD)); @@ -767,6 +749,7 @@ e1000_reg_test(struct e1000_adapter *adapter, uint64_t *data) /* there are several bits on newer hardware that are r/w */ case e1000_82571: case e1000_82572: + case e1000_80003es2lan: toggle = 0x7FFFF3FF; break; case e1000_82573: @@ -1256,6 +1239,10 @@ e1000_integrated_phy_loopback(struct e1000_adapter *adapter) e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x9140); /* autoneg off */ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x8140); + } else if (adapter->hw.phy_type == e1000_phy_gg82563) { + e1000_write_phy_reg(&adapter->hw, + GG82563_PHY_KMRN_MODE_CTRL, + 0x1CE); } /* force 1000, set loopback */ e1000_write_phy_reg(&adapter->hw, PHY_CTRL, 0x4140); @@ -1325,6 +1312,7 @@ e1000_set_phy_loopback(struct e1000_adapter *adapter) case e1000_82571: case e1000_82572: case e1000_82573: + case e1000_80003es2lan: return e1000_integrated_phy_loopback(adapter); break; @@ -1405,6 +1393,11 @@ e1000_loopback_cleanup(struct e1000_adapter *adapter) case e1000_82546_rev_3: default: hw->autoneg = TRUE; + if (hw->phy_type == e1000_phy_gg82563) { + e1000_write_phy_reg(hw, + GG82563_PHY_KMRN_MODE_CTRL, + 0x180); + } e1000_read_phy_reg(hw, PHY_CTRL, &phy_reg); if (phy_reg & MII_CR_LOOPBACK) { phy_reg &= ~MII_CR_LOOPBACK; @@ -1640,10 +1633,26 @@ e1000_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) case E1000_DEV_ID_82546EB_QUAD_COPPER: case E1000_DEV_ID_82545EM_FIBER: case E1000_DEV_ID_82545EM_COPPER: + case E1000_DEV_ID_82546GB_QUAD_COPPER: wol->supported = 0; wol->wolopts = 0; return; + case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: + /* device id 10B5 port-A supports wol */ + if (!adapter->ksp3_port_a) { + wol->supported = 0; + return; + } + /* KSP3 does not suppport UCAST wake-ups for any interface */ + wol->supported = WAKE_MCAST | WAKE_BCAST | WAKE_MAGIC; + + if (adapter->wol & E1000_WUFC_EX) + DPRINTK(DRV, ERR, "Interface does not support " + "directed (unicast) frame wake-up packets\n"); + wol->wolopts = 0; + goto do_defaults; + case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82571EB_FIBER: @@ -1658,8 +1667,9 @@ e1000_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) default: wol->supported = WAKE_UCAST | WAKE_MCAST | WAKE_BCAST | WAKE_MAGIC; - wol->wolopts = 0; + +do_defaults: if (adapter->wol & E1000_WUFC_EX) wol->wolopts |= WAKE_UCAST; if (adapter->wol & E1000_WUFC_MC) @@ -1684,10 +1694,22 @@ e1000_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) case E1000_DEV_ID_82543GC_COPPER: case E1000_DEV_ID_82544EI_FIBER: case E1000_DEV_ID_82546EB_QUAD_COPPER: + case E1000_DEV_ID_82546GB_QUAD_COPPER: case E1000_DEV_ID_82545EM_FIBER: case E1000_DEV_ID_82545EM_COPPER: return wol->wolopts ? -EOPNOTSUPP : 0; + case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: + /* device id 10B5 port-A supports wol */ + if (!adapter->ksp3_port_a) + return wol->wolopts ? -EOPNOTSUPP : 0; + + if (wol->wolopts & WAKE_UCAST) { + DPRINTK(DRV, ERR, "Interface does not support " + "directed (unicast) frame wake-up packets\n"); + return -EOPNOTSUPP; + } + case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82571EB_FIBER: @@ -1799,11 +1821,6 @@ e1000_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, uint64_t *data) { struct e1000_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_E1000_MQ - uint64_t *queue_stat; - int stat_count = sizeof(struct e1000_queue_stats) / sizeof(uint64_t); - int j, k; -#endif int i; e1000_update_stats(adapter); @@ -1812,29 +1829,12 @@ e1000_get_ethtool_stats(struct net_device *netdev, data[i] = (e1000_gstrings_stats[i].sizeof_stat == sizeof(uint64_t)) ? *(uint64_t *)p : *(uint32_t *)p; } -#ifdef CONFIG_E1000_MQ - for (j = 0; j < adapter->num_tx_queues; j++) { - queue_stat = (uint64_t *)&adapter->tx_ring[j].tx_stats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - i += k; - } - for (j = 0; j < adapter->num_rx_queues; j++) { - queue_stat = (uint64_t *)&adapter->rx_ring[j].rx_stats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - i += k; - } -#endif /* BUG_ON(i != E1000_STATS_LEN); */ } static void e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) { -#ifdef CONFIG_E1000_MQ - struct e1000_adapter *adapter = netdev_priv(netdev); -#endif uint8_t *p = data; int i; @@ -1849,20 +1849,6 @@ e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } -#ifdef CONFIG_E1000_MQ - for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(p, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - sprintf(p, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - } -#endif /* BUG_ON(p - data != E1000_STATS_LEN * ETH_GSTRING_LEN); */ break; } diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index beeec0f..523c2c9 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -100,6 +100,8 @@ static void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, #define E1000_WRITE_REG_IO(a, reg, val) \ e1000_write_reg_io((a), E1000_##reg, val) +static int32_t e1000_configure_kmrn_for_10_100(struct e1000_hw *hw); +static int32_t e1000_configure_kmrn_for_1000(struct e1000_hw *hw); /* IGP cable length table */ static const @@ -153,6 +155,11 @@ e1000_set_phy_type(struct e1000_hw *hw) hw->phy_type = e1000_phy_igp; break; } + case GG82563_E_PHY_ID: + if (hw->mac_type == e1000_80003es2lan) { + hw->phy_type = e1000_phy_gg82563; + break; + } /* Fall Through */ default: /* Should never have loaded on this device */ @@ -353,12 +360,19 @@ e1000_set_mac_type(struct e1000_hw *hw) case E1000_DEV_ID_82573L: hw->mac_type = e1000_82573; break; + case E1000_DEV_ID_80003ES2LAN_COPPER_DPT: + case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: + hw->mac_type = e1000_80003es2lan; + break; default: /* Should never have loaded on this device */ return -E1000_ERR_MAC_TYPE; } switch(hw->mac_type) { + case e1000_80003es2lan: + hw->swfw_sync_present = TRUE; + /* fall through */ case e1000_82571: case e1000_82572: case e1000_82573: @@ -399,6 +413,7 @@ e1000_set_media_type(struct e1000_hw *hw) case E1000_DEV_ID_82546GB_SERDES: case E1000_DEV_ID_82571EB_SERDES: case E1000_DEV_ID_82572EI_SERDES: + case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: hw->media_type = e1000_media_type_internal_serdes; break; default: @@ -575,6 +590,7 @@ e1000_reset_hw(struct e1000_hw *hw) /* fall through */ case e1000_82571: case e1000_82572: + case e1000_80003es2lan: ret_val = e1000_get_auto_rd_done(hw); if(ret_val) /* We don't want to continue accessing MAC registers. */ @@ -641,6 +657,7 @@ e1000_init_hw(struct e1000_hw *hw) uint16_t cmd_mmrbc; uint16_t stat_mmrbc; uint32_t mta_size; + uint32_t reg_data; uint32_t ctrl_ext; DEBUGFUNC("e1000_init_hw"); @@ -739,6 +756,7 @@ e1000_init_hw(struct e1000_hw *hw) case e1000_82571: case e1000_82572: case e1000_82573: + case e1000_80003es2lan: ctrl |= E1000_TXDCTL_COUNT_DESC; break; } @@ -752,12 +770,34 @@ e1000_init_hw(struct e1000_hw *hw) switch (hw->mac_type) { default: break; + case e1000_80003es2lan: + /* Enable retransmit on late collisions */ + reg_data = E1000_READ_REG(hw, TCTL); + reg_data |= E1000_TCTL_RTLC; + E1000_WRITE_REG(hw, TCTL, reg_data); + + /* Configure Gigabit Carry Extend Padding */ + reg_data = E1000_READ_REG(hw, TCTL_EXT); + reg_data &= ~E1000_TCTL_EXT_GCEX_MASK; + reg_data |= DEFAULT_80003ES2LAN_TCTL_EXT_GCEX; + E1000_WRITE_REG(hw, TCTL_EXT, reg_data); + + /* Configure Transmit Inter-Packet Gap */ + reg_data = E1000_READ_REG(hw, TIPG); + reg_data &= ~E1000_TIPG_IPGT_MASK; + reg_data |= DEFAULT_80003ES2LAN_TIPG_IPGT_1000; + E1000_WRITE_REG(hw, TIPG, reg_data); + + reg_data = E1000_READ_REG_ARRAY(hw, FFLT, 0x0001); + reg_data &= ~0x00100000; + E1000_WRITE_REG_ARRAY(hw, FFLT, 0x0001, reg_data); + /* Fall through */ case e1000_82571: case e1000_82572: ctrl = E1000_READ_REG(hw, TXDCTL1); - ctrl &= ~E1000_TXDCTL_WTHRESH; - ctrl |= E1000_TXDCTL_COUNT_DESC | E1000_TXDCTL_FULL_TX_DESC_WB; - ctrl |= (1 << 22); + ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; + if(hw->mac_type >= e1000_82571) + ctrl |= E1000_TXDCTL_COUNT_DESC; E1000_WRITE_REG(hw, TXDCTL1, ctrl); break; } @@ -906,7 +946,13 @@ e1000_setup_link(struct e1000_hw *hw) * signal detection. So this should be done before e1000_setup_pcs_link() * or e1000_phy_setup() is called. */ - if(hw->mac_type == e1000_82543) { + if (hw->mac_type == e1000_82543) { + ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG, + 1, &eeprom_data); + if (ret_val) { + DEBUGOUT("EEPROM Read Error\n"); + return -E1000_ERR_EEPROM; + } ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) << SWDPIO__EXT_SHIFT); E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); @@ -1308,6 +1354,154 @@ e1000_copper_link_igp_setup(struct e1000_hw *hw) return E1000_SUCCESS; } +/******************************************************************** +* Copper link setup for e1000_phy_gg82563 series. +* +* hw - Struct containing variables accessed by shared code +*********************************************************************/ +static int32_t +e1000_copper_link_ggp_setup(struct e1000_hw *hw) +{ + int32_t ret_val; + uint16_t phy_data; + uint32_t reg_data; + + DEBUGFUNC("e1000_copper_link_ggp_setup"); + + if(!hw->phy_reset_disable) { + + /* Enable CRS on TX for half-duplex operation. */ + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, + &phy_data); + if(ret_val) + return ret_val; + + phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; + /* Use 25MHz for both link down and 1000BASE-T for Tx clock */ + phy_data |= GG82563_MSCR_TX_CLK_1000MBPS_25MHZ; + + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, + phy_data); + if(ret_val) + return ret_val; + + /* Options: + * MDI/MDI-X = 0 (default) + * 0 - Auto for all speeds + * 1 - MDI mode + * 2 - MDI-X mode + * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) + */ + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL, &phy_data); + if(ret_val) + return ret_val; + + phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK; + + switch (hw->mdix) { + case 1: + phy_data |= GG82563_PSCR_CROSSOVER_MODE_MDI; + break; + case 2: + phy_data |= GG82563_PSCR_CROSSOVER_MODE_MDIX; + break; + case 0: + default: + phy_data |= GG82563_PSCR_CROSSOVER_MODE_AUTO; + break; + } + + /* Options: + * disable_polarity_correction = 0 (default) + * Automatic Correction for Reversed Cable Polarity + * 0 - Disabled + * 1 - Enabled + */ + phy_data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE; + if(hw->disable_polarity_correction == 1) + phy_data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE; + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data); + + if(ret_val) + return ret_val; + + /* SW Reset the PHY so all changes take effect */ + ret_val = e1000_phy_reset(hw); + if (ret_val) { + DEBUGOUT("Error Resetting the PHY\n"); + return ret_val; + } + } /* phy_reset_disable */ + + if (hw->mac_type == e1000_80003es2lan) { + /* Bypass RX and TX FIFO's */ + ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL, + E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS | + E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS); + if (ret_val) + return ret_val; + + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + + phy_data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG; + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, phy_data); + + if (ret_val) + return ret_val; + + reg_data = E1000_READ_REG(hw, CTRL_EXT); + reg_data &= ~(E1000_CTRL_EXT_LINK_MODE_MASK); + E1000_WRITE_REG(hw, CTRL_EXT, reg_data); + + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, + &phy_data); + if (ret_val) + return ret_val; + + /* Do not init these registers when the HW is in IAMT mode, since the + * firmware will have already initialized them. We only initialize + * them if the HW is not in IAMT mode. + */ + if (e1000_check_mng_mode(hw) == FALSE) { + /* Enable Electrical Idle on the PHY */ + phy_data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE; + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, + phy_data); + if (ret_val) + return ret_val; + + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, + &phy_data); + if (ret_val) + return ret_val; + + /* Enable Pass False Carrier on the PHY */ + phy_data |= GG82563_KMCR_PASS_FALSE_CARRIER; + + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, + phy_data); + if (ret_val) + return ret_val; + } + + /* Workaround: Disable padding in Kumeran interface in the MAC + * and in the PHY to avoid CRC errors. + */ + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_INBAND_CTRL, + &phy_data); + if (ret_val) + return ret_val; + phy_data |= GG82563_ICR_DIS_PADDING; + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_INBAND_CTRL, + phy_data); + if (ret_val) + return ret_val; + } + + return E1000_SUCCESS; +} /******************************************************************** * Copper link setup for e1000_phy_m88 series. @@ -1518,6 +1712,7 @@ e1000_setup_copper_link(struct e1000_hw *hw) int32_t ret_val; uint16_t i; uint16_t phy_data; + uint16_t reg_data; DEBUGFUNC("e1000_setup_copper_link"); @@ -1526,6 +1721,22 @@ e1000_setup_copper_link(struct e1000_hw *hw) if(ret_val) return ret_val; + switch (hw->mac_type) { + case e1000_80003es2lan: + ret_val = e1000_read_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_INB_CTRL, + ®_data); + if (ret_val) + return ret_val; + reg_data |= E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING; + ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_INB_CTRL, + reg_data); + if (ret_val) + return ret_val; + break; + default: + break; + } + if (hw->phy_type == e1000_phy_igp || hw->phy_type == e1000_phy_igp_2) { ret_val = e1000_copper_link_igp_setup(hw); @@ -1535,6 +1746,10 @@ e1000_setup_copper_link(struct e1000_hw *hw) ret_val = e1000_copper_link_mgp_setup(hw); if(ret_val) return ret_val; + } else if (hw->phy_type == e1000_phy_gg82563) { + ret_val = e1000_copper_link_ggp_setup(hw); + if(ret_val) + return ret_val; } if(hw->autoneg) { @@ -1582,6 +1797,59 @@ e1000_setup_copper_link(struct e1000_hw *hw) } /****************************************************************************** +* Configure the MAC-to-PHY interface for 10/100Mbps +* +* hw - Struct containing variables accessed by shared code +******************************************************************************/ +static int32_t +e1000_configure_kmrn_for_10_100(struct e1000_hw *hw) +{ + int32_t ret_val = E1000_SUCCESS; + uint32_t tipg; + uint16_t reg_data; + + DEBUGFUNC("e1000_configure_kmrn_for_10_100"); + + reg_data = E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT; + ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_HD_CTRL, + reg_data); + if (ret_val) + return ret_val; + + /* Configure Transmit Inter-Packet Gap */ + tipg = E1000_READ_REG(hw, TIPG); + tipg &= ~E1000_TIPG_IPGT_MASK; + tipg |= DEFAULT_80003ES2LAN_TIPG_IPGT_10_100; + E1000_WRITE_REG(hw, TIPG, tipg); + + return ret_val; +} + +static int32_t +e1000_configure_kmrn_for_1000(struct e1000_hw *hw) +{ + int32_t ret_val = E1000_SUCCESS; + uint16_t reg_data; + uint32_t tipg; + + DEBUGFUNC("e1000_configure_kmrn_for_1000"); + + reg_data = E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT; + ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_HD_CTRL, + reg_data); + if (ret_val) + return ret_val; + + /* Configure Transmit Inter-Packet Gap */ + tipg = E1000_READ_REG(hw, TIPG); + tipg &= ~E1000_TIPG_IPGT_MASK; + tipg |= DEFAULT_80003ES2LAN_TIPG_IPGT_1000; + E1000_WRITE_REG(hw, TIPG, tipg); + + return ret_val; +} + +/****************************************************************************** * Configures PHY autoneg and flow control advertisement settings * * hw - Struct containing variables accessed by shared code @@ -1802,7 +2070,8 @@ e1000_phy_force_speed_duplex(struct e1000_hw *hw) /* Write the configured values back to the Device Control Reg. */ E1000_WRITE_REG(hw, CTRL, ctrl); - if (hw->phy_type == e1000_phy_m88) { + if ((hw->phy_type == e1000_phy_m88) || + (hw->phy_type == e1000_phy_gg82563)) { ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if(ret_val) return ret_val; @@ -1871,7 +2140,8 @@ e1000_phy_force_speed_duplex(struct e1000_hw *hw) msec_delay(100); } if((i == 0) && - (hw->phy_type == e1000_phy_m88)) { + ((hw->phy_type == e1000_phy_m88) || + (hw->phy_type == e1000_phy_gg82563))) { /* We didn't get link. Reset the DSP and wait again for link. */ ret_val = e1000_phy_reset_dsp(hw); if(ret_val) { @@ -1930,6 +2200,27 @@ e1000_phy_force_speed_duplex(struct e1000_hw *hw) if(ret_val) return ret_val; } + } else if (hw->phy_type == e1000_phy_gg82563) { + /* The TX_CLK of the Extended PHY Specific Control Register defaults + * to 2.5MHz on a reset. We need to re-force it back to 25MHz, if + * we're not in a forced 10/duplex configuration. */ + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data &= ~GG82563_MSCR_TX_CLK_MASK; + if ((hw->forced_speed_duplex == e1000_10_full) || + (hw->forced_speed_duplex == e1000_10_half)) + phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5MHZ; + else + phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25MHZ; + + /* Also due to the reset, we need to enable CRS on Tx. */ + phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; + + ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; } return E1000_SUCCESS; } @@ -2592,6 +2883,16 @@ e1000_get_speed_and_duplex(struct e1000_hw *hw, } } + if ((hw->mac_type == e1000_80003es2lan) && + (hw->media_type == e1000_media_type_copper)) { + if (*speed == SPEED_1000) + ret_val = e1000_configure_kmrn_for_1000(hw); + else + ret_val = e1000_configure_kmrn_for_10_100(hw); + if (ret_val) + return ret_val; + } + return E1000_SUCCESS; } @@ -2767,6 +3068,72 @@ e1000_shift_in_mdi_bits(struct e1000_hw *hw) return data; } +int32_t +e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask) +{ + uint32_t swfw_sync = 0; + uint32_t swmask = mask; + uint32_t fwmask = mask << 16; + int32_t timeout = 200; + + DEBUGFUNC("e1000_swfw_sync_acquire"); + + if (!hw->swfw_sync_present) + return e1000_get_hw_eeprom_semaphore(hw); + + while(timeout) { + if (e1000_get_hw_eeprom_semaphore(hw)) + return -E1000_ERR_SWFW_SYNC; + + swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) { + break; + } + + /* firmware currently using resource (fwmask) */ + /* or other software thread currently using resource (swmask) */ + e1000_put_hw_eeprom_semaphore(hw); + msec_delay_irq(5); + timeout--; + } + + if (!timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + return -E1000_ERR_SWFW_SYNC; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync); + + e1000_put_hw_eeprom_semaphore(hw); + return E1000_SUCCESS; +} + +void +e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask) +{ + uint32_t swfw_sync; + uint32_t swmask = mask; + + DEBUGFUNC("e1000_swfw_sync_release"); + + if (!hw->swfw_sync_present) { + e1000_put_hw_eeprom_semaphore(hw); + return; + } + + /* if (e1000_get_hw_eeprom_semaphore(hw)) + * return -E1000_ERR_SWFW_SYNC; */ + while (e1000_get_hw_eeprom_semaphore(hw) != E1000_SUCCESS); + /* empty */ + + swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC); + swfw_sync &= ~swmask; + E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync); + + e1000_put_hw_eeprom_semaphore(hw); +} + /***************************************************************************** * Reads the value from a PHY register, if the value is on a specific non zero * page, sets the page first. @@ -2779,22 +3146,55 @@ e1000_read_phy_reg(struct e1000_hw *hw, uint16_t *phy_data) { uint32_t ret_val; + uint16_t swfw; DEBUGFUNC("e1000_read_phy_reg"); + if ((hw->mac_type == e1000_80003es2lan) && + (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { + swfw = E1000_SWFW_PHY1_SM; + } else { + swfw = E1000_SWFW_PHY0_SM; + } + if (e1000_swfw_sync_acquire(hw, swfw)) + return -E1000_ERR_SWFW_SYNC; + if((hw->phy_type == e1000_phy_igp || hw->phy_type == e1000_phy_igp_2) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (uint16_t)reg_addr); if(ret_val) { + e1000_swfw_sync_release(hw, swfw); return ret_val; } + } else if (hw->phy_type == e1000_phy_gg82563) { + if (((reg_addr & MAX_PHY_REG_ADDRESS) > MAX_PHY_MULTI_PAGE_REG) || + (hw->mac_type == e1000_80003es2lan)) { + /* Select Configuration Page */ + if ((reg_addr & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { + ret_val = e1000_write_phy_reg_ex(hw, GG82563_PHY_PAGE_SELECT, + (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); + } else { + /* Use Alternative Page Select register to access + * registers 30 and 31 + */ + ret_val = e1000_write_phy_reg_ex(hw, + GG82563_PHY_PAGE_SELECT_ALT, + (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); + } + + if (ret_val) { + e1000_swfw_sync_release(hw, swfw); + return ret_val; + } + } } ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); + e1000_swfw_sync_release(hw, swfw); return ret_val; } @@ -2885,22 +3285,55 @@ e1000_write_phy_reg(struct e1000_hw *hw, uint16_t phy_data) { uint32_t ret_val; + uint16_t swfw; DEBUGFUNC("e1000_write_phy_reg"); + if ((hw->mac_type == e1000_80003es2lan) && + (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { + swfw = E1000_SWFW_PHY1_SM; + } else { + swfw = E1000_SWFW_PHY0_SM; + } + if (e1000_swfw_sync_acquire(hw, swfw)) + return -E1000_ERR_SWFW_SYNC; + if((hw->phy_type == e1000_phy_igp || hw->phy_type == e1000_phy_igp_2) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (uint16_t)reg_addr); if(ret_val) { + e1000_swfw_sync_release(hw, swfw); return ret_val; } + } else if (hw->phy_type == e1000_phy_gg82563) { + if (((reg_addr & MAX_PHY_REG_ADDRESS) > MAX_PHY_MULTI_PAGE_REG) || + (hw->mac_type == e1000_80003es2lan)) { + /* Select Configuration Page */ + if ((reg_addr & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { + ret_val = e1000_write_phy_reg_ex(hw, GG82563_PHY_PAGE_SELECT, + (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); + } else { + /* Use Alternative Page Select register to access + * registers 30 and 31 + */ + ret_val = e1000_write_phy_reg_ex(hw, + GG82563_PHY_PAGE_SELECT_ALT, + (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT)); + } + + if (ret_val) { + e1000_swfw_sync_release(hw, swfw); + return ret_val; + } + } } ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); + e1000_swfw_sync_release(hw, swfw); return ret_val; } @@ -2967,6 +3400,65 @@ e1000_write_phy_reg_ex(struct e1000_hw *hw, return E1000_SUCCESS; } +int32_t +e1000_read_kmrn_reg(struct e1000_hw *hw, + uint32_t reg_addr, + uint16_t *data) +{ + uint32_t reg_val; + uint16_t swfw; + DEBUGFUNC("e1000_read_kmrn_reg"); + + if ((hw->mac_type == e1000_80003es2lan) && + (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { + swfw = E1000_SWFW_PHY1_SM; + } else { + swfw = E1000_SWFW_PHY0_SM; + } + if (e1000_swfw_sync_acquire(hw, swfw)) + return -E1000_ERR_SWFW_SYNC; + + /* Write register address */ + reg_val = ((reg_addr << E1000_KUMCTRLSTA_OFFSET_SHIFT) & + E1000_KUMCTRLSTA_OFFSET) | + E1000_KUMCTRLSTA_REN; + E1000_WRITE_REG(hw, KUMCTRLSTA, reg_val); + udelay(2); + + /* Read the data returned */ + reg_val = E1000_READ_REG(hw, KUMCTRLSTA); + *data = (uint16_t)reg_val; + + e1000_swfw_sync_release(hw, swfw); + return E1000_SUCCESS; +} + +int32_t +e1000_write_kmrn_reg(struct e1000_hw *hw, + uint32_t reg_addr, + uint16_t data) +{ + uint32_t reg_val; + uint16_t swfw; + DEBUGFUNC("e1000_write_kmrn_reg"); + + if ((hw->mac_type == e1000_80003es2lan) && + (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { + swfw = E1000_SWFW_PHY1_SM; + } else { + swfw = E1000_SWFW_PHY0_SM; + } + if (e1000_swfw_sync_acquire(hw, swfw)) + return -E1000_ERR_SWFW_SYNC; + + reg_val = ((reg_addr << E1000_KUMCTRLSTA_OFFSET_SHIFT) & + E1000_KUMCTRLSTA_OFFSET) | data; + E1000_WRITE_REG(hw, KUMCTRLSTA, reg_val); + udelay(2); + + e1000_swfw_sync_release(hw, swfw); + return E1000_SUCCESS; +} /****************************************************************************** * Returns the PHY to the power-on reset state @@ -2979,6 +3471,7 @@ e1000_phy_hw_reset(struct e1000_hw *hw) uint32_t ctrl, ctrl_ext; uint32_t led_ctrl; int32_t ret_val; + uint16_t swfw; DEBUGFUNC("e1000_phy_hw_reset"); @@ -2991,11 +3484,21 @@ e1000_phy_hw_reset(struct e1000_hw *hw) DEBUGOUT("Resetting Phy...\n"); if(hw->mac_type > e1000_82543) { + if ((hw->mac_type == e1000_80003es2lan) && + (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) { + swfw = E1000_SWFW_PHY1_SM; + } else { + swfw = E1000_SWFW_PHY0_SM; + } + if (e1000_swfw_sync_acquire(hw, swfw)) { + e1000_release_software_semaphore(hw); + return -E1000_ERR_SWFW_SYNC; + } /* Read the device control register and assert the E1000_CTRL_PHY_RST * bit. Then, take it out of reset. * For pre-e1000_82571 hardware, we delay for 10ms between the assert * and deassert. For e1000_82571 hardware and later, we instead delay - * for 10ms after the deassertion. + * for 50us between and 10ms after the deassertion. */ ctrl = E1000_READ_REG(hw, CTRL); E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PHY_RST); @@ -3011,6 +3514,7 @@ e1000_phy_hw_reset(struct e1000_hw *hw) if (hw->mac_type >= e1000_82571) msec_delay(10); + e1000_swfw_sync_release(hw, swfw); } else { /* Read the Extended Device Control Register, assert the PHY_RESET_DIR * bit to put the PHY into reset. Then, take it out of reset. @@ -3037,6 +3541,7 @@ e1000_phy_hw_reset(struct e1000_hw *hw) /* Wait for FW to finish PHY configuration. */ ret_val = e1000_get_phy_cfg_done(hw); + e1000_release_software_semaphore(hw); return ret_val; } @@ -3114,6 +3619,15 @@ e1000_detect_gig_phy(struct e1000_hw *hw) return E1000_SUCCESS; } + /* ESB-2 PHY reads require e1000_phy_gg82563 to be set because of a work- + * around that forces PHY page 0 to be set or the reads fail. The rest of + * the code in this routine uses e1000_read_phy_reg to read the PHY ID. + * So for ESB-2 we need to have this set so our reads won't fail. If the + * attached PHY is not a e1000_phy_gg82563, the routines below will figure + * this out as well. */ + if (hw->mac_type == e1000_80003es2lan) + hw->phy_type = e1000_phy_gg82563; + /* Read the PHY ID Registers to identify which PHY is onboard. */ ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high); if(ret_val) @@ -3151,6 +3665,9 @@ e1000_detect_gig_phy(struct e1000_hw *hw) case e1000_82573: if(hw->phy_id == M88E1111_I_PHY_ID) match = TRUE; break; + case e1000_80003es2lan: + if (hw->phy_id == GG82563_E_PHY_ID) match = TRUE; + break; default: DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type); return -E1000_ERR_CONFIG; @@ -3177,8 +3694,10 @@ e1000_phy_reset_dsp(struct e1000_hw *hw) DEBUGFUNC("e1000_phy_reset_dsp"); do { - ret_val = e1000_write_phy_reg(hw, 29, 0x001d); - if(ret_val) break; + if (hw->phy_type != e1000_phy_gg82563) { + ret_val = e1000_write_phy_reg(hw, 29, 0x001d); + if(ret_val) break; + } ret_val = e1000_write_phy_reg(hw, 30, 0x00c1); if(ret_val) break; ret_val = e1000_write_phy_reg(hw, 30, 0x0000); @@ -3310,8 +3829,17 @@ e1000_phy_m88_get_info(struct e1000_hw *hw, /* Cable Length Estimation and Local/Remote Receiver Information * are only valid at 1000 Mbps. */ - phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT); + if (hw->phy_type != e1000_phy_gg82563) { + phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT); + } else { + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE, + &phy_data); + if (ret_val) + return ret_val; + + phy_info->cable_length = phy_data & GG82563_DSPD_CABLE_LENGTH; + } ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); if(ret_val) @@ -3392,7 +3920,8 @@ e1000_validate_mdi_setting(struct e1000_hw *hw) /****************************************************************************** * Sets up eeprom variables in the hw struct. Must be called after mac_type - * is configured. + * is configured. Additionally, if this is ICH8, the flash controller GbE + * registers must be mapped, or this will crash. * * hw - Struct containing variables accessed by shared code *****************************************************************************/ @@ -3505,6 +4034,20 @@ e1000_init_eeprom_params(struct e1000_hw *hw) E1000_WRITE_REG(hw, EECD, eecd); } break; + case e1000_80003es2lan: + eeprom->type = e1000_eeprom_spi; + eeprom->opcode_bits = 8; + eeprom->delay_usec = 1; + if (eecd & E1000_EECD_ADDR_BITS) { + eeprom->page_size = 32; + eeprom->address_bits = 16; + } else { + eeprom->page_size = 8; + eeprom->address_bits = 8; + } + eeprom->use_eerd = TRUE; + eeprom->use_eewr = FALSE; + break; default: break; } @@ -3685,9 +4228,8 @@ e1000_acquire_eeprom(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_eeprom"); - if(e1000_get_hw_eeprom_semaphore(hw)) - return -E1000_ERR_EEPROM; - + if (e1000_swfw_sync_acquire(hw, E1000_SWFW_EEP_SM)) + return -E1000_ERR_SWFW_SYNC; eecd = E1000_READ_REG(hw, EECD); if (hw->mac_type != e1000_82573) { @@ -3706,7 +4248,7 @@ e1000_acquire_eeprom(struct e1000_hw *hw) eecd &= ~E1000_EECD_REQ; E1000_WRITE_REG(hw, EECD, eecd); DEBUGOUT("Could not acquire EEPROM grant\n"); - e1000_put_hw_eeprom_semaphore(hw); + e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM); return -E1000_ERR_EEPROM; } } @@ -3829,7 +4371,7 @@ e1000_release_eeprom(struct e1000_hw *hw) E1000_WRITE_REG(hw, EECD, eecd); } - e1000_put_hw_eeprom_semaphore(hw); + e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM); } /****************************************************************************** @@ -3908,6 +4450,8 @@ e1000_read_eeprom(struct e1000_hw *hw, if (e1000_is_onboard_nvm_eeprom(hw) == TRUE && hw->eeprom.use_eerd == FALSE) { switch (hw->mac_type) { + case e1000_80003es2lan: + break; default: /* Prepare the EEPROM for reading */ if (e1000_acquire_eeprom(hw) != E1000_SUCCESS) @@ -4025,6 +4569,9 @@ e1000_write_eeprom_eewr(struct e1000_hw *hw, uint32_t i = 0; int32_t error = 0; + if (e1000_swfw_sync_acquire(hw, E1000_SWFW_EEP_SM)) + return -E1000_ERR_SWFW_SYNC; + for (i = 0; i < words; i++) { register_value = (data[i] << E1000_EEPROM_RW_REG_DATA) | ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) | @@ -4044,6 +4591,7 @@ e1000_write_eeprom_eewr(struct e1000_hw *hw, } } + e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM); return error; } @@ -4085,6 +4633,8 @@ e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw) { uint32_t eecd = 0; + DEBUGFUNC("e1000_is_onboard_nvm_eeprom"); + if(hw->mac_type == e1000_82573) { eecd = E1000_READ_REG(hw, EECD); @@ -4511,6 +5061,7 @@ e1000_read_mac_addr(struct e1000_hw * hw) case e1000_82546: case e1000_82546_rev_3: case e1000_82571: + case e1000_80003es2lan: if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) hw->perm_mac_addr[5] ^= 0x01; break; @@ -4749,8 +5300,37 @@ e1000_rar_set(struct e1000_hw *hw, rar_low = ((uint32_t) addr[0] | ((uint32_t) addr[1] << 8) | ((uint32_t) addr[2] << 16) | ((uint32_t) addr[3] << 24)); + rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8)); - rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8) | E1000_RAH_AV); + /* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx + * unit hang. + * + * Description: + * If there are any Rx frames queued up or otherwise present in the HW + * before RSS is enabled, and then we enable RSS, the HW Rx unit will + * hang. To work around this issue, we have to disable receives and + * flush out all Rx frames before we enable RSS. To do so, we modify we + * redirect all Rx traffic to manageability and then reset the HW. + * This flushes away Rx frames, and (since the redirections to + * manageability persists across resets) keeps new ones from coming in + * while we work. Then, we clear the Address Valid AV bit for all MAC + * addresses and undo the re-direction to manageability. + * Now, frames are coming in again, but the MAC won't accept them, so + * far so good. We now proceed to initialize RSS (if necessary) and + * configure the Rx unit. Last, we re-enable the AV bits and continue + * on our merry way. + */ + switch (hw->mac_type) { + case e1000_82571: + case e1000_82572: + case e1000_80003es2lan: + if (hw->leave_av_bit_off == TRUE) + break; + default: + /* Indicate to hardware the Address is Valid. */ + rar_high |= E1000_RAH_AV; + break; + } E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low); E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high); @@ -5330,6 +5910,7 @@ e1000_get_bus_info(struct e1000_hw *hw) hw->bus_width = e1000_bus_width_pciex_1; break; case e1000_82571: + case e1000_80003es2lan: hw->bus_type = e1000_bus_type_pci_express; hw->bus_speed = e1000_bus_speed_2500; hw->bus_width = e1000_bus_width_pciex_4; @@ -5475,6 +6056,34 @@ e1000_get_cable_length(struct e1000_hw *hw, return -E1000_ERR_PHY; break; } + } else if (hw->phy_type == e1000_phy_gg82563) { + ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE, + &phy_data); + if (ret_val) + return ret_val; + cable_length = phy_data & GG82563_DSPD_CABLE_LENGTH; + + switch (cable_length) { + case e1000_gg_cable_length_60: + *min_length = 0; + *max_length = e1000_igp_cable_length_60; + break; + case e1000_gg_cable_length_60_115: + *min_length = e1000_igp_cable_length_60; + *max_length = e1000_igp_cable_length_115; + break; + case e1000_gg_cable_length_115_150: + *min_length = e1000_igp_cable_length_115; + *max_length = e1000_igp_cable_length_150; + break; + case e1000_gg_cable_length_150: + *min_length = e1000_igp_cable_length_150; + *max_length = e1000_igp_cable_length_180; + break; + default: + return -E1000_ERR_PHY; + break; + } } else if(hw->phy_type == e1000_phy_igp) { /* For IGP PHY */ uint16_t agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = {IGP01E1000_PHY_AGC_A, @@ -5584,7 +6193,8 @@ e1000_check_polarity(struct e1000_hw *hw, DEBUGFUNC("e1000_check_polarity"); - if(hw->phy_type == e1000_phy_m88) { + if ((hw->phy_type == e1000_phy_m88) || + (hw->phy_type == e1000_phy_gg82563)) { /* return the Polarity bit in the Status register. */ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); @@ -5653,7 +6263,8 @@ e1000_check_downshift(struct e1000_hw *hw) return ret_val; hw->speed_downgraded = (phy_data & IGP01E1000_PLHR_SS_DOWNGRADE) ? 1 : 0; - } else if(hw->phy_type == e1000_phy_m88) { + } else if ((hw->phy_type == e1000_phy_m88) || + (hw->phy_type == e1000_phy_gg82563)) { ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if(ret_val) @@ -6686,6 +7297,7 @@ e1000_get_auto_rd_done(struct e1000_hw *hw) case e1000_82571: case e1000_82572: case e1000_82573: + case e1000_80003es2lan: while(timeout) { if (E1000_READ_REG(hw, EECD) & E1000_EECD_AUTO_RD) break; else msec_delay(1); @@ -6729,6 +7341,11 @@ e1000_get_phy_cfg_done(struct e1000_hw *hw) default: msec_delay(10); break; + case e1000_80003es2lan: + /* Separate *_CFG_DONE_* bit for each port */ + if (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1) + cfg_mask = E1000_EEPROM_CFG_DONE_PORT_1; + /* Fall Through */ case e1000_82571: case e1000_82572: while (timeout) { @@ -6746,12 +7363,6 @@ e1000_get_phy_cfg_done(struct e1000_hw *hw) break; } - /* PHY configuration from NVM just starts after EECD_AUTO_RD sets to high. - * Need to wait for PHY configuration completion before accessing NVM - * and PHY. */ - if (hw->mac_type == e1000_82573) - msec_delay(25); - return E1000_SUCCESS; } @@ -6777,6 +7388,11 @@ e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw) if(!hw->eeprom_semaphore_present) return E1000_SUCCESS; + if (hw->mac_type == e1000_80003es2lan) { + /* Get the SW semaphore. */ + if (e1000_get_software_semaphore(hw) != E1000_SUCCESS) + return -E1000_ERR_EEPROM; + } /* Get the FW semaphore. */ timeout = hw->eeprom.word_size + 1; @@ -6822,10 +7438,75 @@ e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw) return; swsm = E1000_READ_REG(hw, SWSM); + if (hw->mac_type == e1000_80003es2lan) { + /* Release both semaphores. */ + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + } else swsm &= ~(E1000_SWSM_SWESMBI); E1000_WRITE_REG(hw, SWSM, swsm); } +/*************************************************************************** + * + * Obtaining software semaphore bit (SMBI) before resetting PHY. + * + * hw: Struct containing variables accessed by shared code + * + * returns: - E1000_ERR_RESET if fail to obtain semaphore. + * E1000_SUCCESS at any other case. + * + ***************************************************************************/ +int32_t +e1000_get_software_semaphore(struct e1000_hw *hw) +{ + int32_t timeout = hw->eeprom.word_size + 1; + uint32_t swsm; + + DEBUGFUNC("e1000_get_software_semaphore"); + + if (hw->mac_type != e1000_80003es2lan) + return E1000_SUCCESS; + + while(timeout) { + swsm = E1000_READ_REG(hw, SWSM); + /* If SMBI bit cleared, it is now set and we hold the semaphore */ + if(!(swsm & E1000_SWSM_SMBI)) + break; + msec_delay_irq(1); + timeout--; + } + + if(!timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_RESET; + } + + return E1000_SUCCESS; +} + +/*************************************************************************** + * + * Release semaphore bit (SMBI). + * + * hw: Struct containing variables accessed by shared code + * + ***************************************************************************/ +void +e1000_release_software_semaphore(struct e1000_hw *hw) +{ + uint32_t swsm; + + DEBUGFUNC("e1000_release_software_semaphore"); + + if (hw->mac_type != e1000_80003es2lan) + return; + + swsm = E1000_READ_REG(hw, SWSM); + /* Release the SW semaphores.*/ + swsm &= ~E1000_SWSM_SMBI; + E1000_WRITE_REG(hw, SWSM, swsm); +} + /****************************************************************************** * Checks if PHY reset is blocked due to SOL/IDER session, for example. * Returning E1000_BLK_PHY_RESET isn't necessarily an error. But it's up to @@ -6862,6 +7543,7 @@ e1000_arc_subsystem_valid(struct e1000_hw *hw) case e1000_82571: case e1000_82572: case e1000_82573: + case e1000_80003es2lan: fwsm = E1000_READ_REG(hw, FWSM); if((fwsm & E1000_FWSM_MODE_MASK) != 0) return TRUE; diff --git a/drivers/net/e1000/e1000_hw.h b/drivers/net/e1000/e1000_hw.h index f1219dd..150e45e 100644 --- a/drivers/net/e1000/e1000_hw.h +++ b/drivers/net/e1000/e1000_hw.h @@ -60,6 +60,7 @@ typedef enum { e1000_82571, e1000_82572, e1000_82573, + e1000_80003es2lan, e1000_num_macs } e1000_mac_type; @@ -139,6 +140,13 @@ typedef enum { } e1000_cable_length; typedef enum { + e1000_gg_cable_length_60 = 0, + e1000_gg_cable_length_60_115 = 1, + e1000_gg_cable_length_115_150 = 2, + e1000_gg_cable_length_150 = 4 +} e1000_gg_cable_length; + +typedef enum { e1000_igp_cable_length_10 = 10, e1000_igp_cable_length_20 = 20, e1000_igp_cable_length_30 = 30, @@ -208,6 +216,7 @@ typedef enum { e1000_phy_m88 = 0, e1000_phy_igp, e1000_phy_igp_2, + e1000_phy_gg82563, e1000_phy_undefined = 0xFF } e1000_phy_type; @@ -281,6 +290,7 @@ typedef enum { #define E1000_ERR_MASTER_REQUESTS_PENDING 10 #define E1000_ERR_HOST_INTERFACE_COMMAND 11 #define E1000_BLK_PHY_RESET 12 +#define E1000_ERR_SWFW_SYNC 13 /* Function prototypes */ /* Initialization */ @@ -304,6 +314,8 @@ int32_t e1000_phy_hw_reset(struct e1000_hw *hw); int32_t e1000_phy_reset(struct e1000_hw *hw); int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info); int32_t e1000_validate_mdi_setting(struct e1000_hw *hw); +int32_t e1000_read_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *data); +int32_t e1000_write_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data); /* EEPROM Functions */ int32_t e1000_init_eeprom_params(struct e1000_hw *hw); @@ -454,6 +466,8 @@ int32_t e1000_check_phy_reset_block(struct e1000_hw *hw); #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 +#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 +#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 #define NODE_ADDRESS_SIZE 6 @@ -850,6 +864,7 @@ struct e1000_ffvt_entry { #define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ #define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ #define E1000_TCTL 0x00400 /* TX Control - RW */ +#define E1000_TCTL_EXT 0x00404 /* Extended TX Control - RW */ #define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ #define E1000_TBT 0x00448 /* TX Burst Timer - RW */ #define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ @@ -996,6 +1011,11 @@ struct e1000_ffvt_entry { #define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ #define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ +#define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */ +#define E1000_MDPHYA 0x0003C /* PHY address - RW */ +#define E1000_MANC2H 0x05860 /* Managment Control To Host - RW */ +#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ + #define E1000_GCR 0x05B00 /* PCI-Ex Control */ #define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ #define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ @@ -1065,6 +1085,7 @@ struct e1000_ffvt_entry { #define E1000_82542_RXCW E1000_RXCW #define E1000_82542_MTA 0x00200 #define E1000_82542_TCTL E1000_TCTL +#define E1000_82542_TCTL_EXT E1000_TCTL_EXT #define E1000_82542_TIPG E1000_TIPG #define E1000_82542_TDBAL 0x00420 #define E1000_82542_TDBAH 0x00424 @@ -1212,6 +1233,8 @@ struct e1000_ffvt_entry { #define E1000_82542_RSSRK E1000_RSSRK #define E1000_82542_RSSIM E1000_RSSIM #define E1000_82542_RSSIR E1000_RSSIR +#define E1000_82542_KUMCTRLSTA E1000_KUMCTRLSTA +#define E1000_82542_SW_FW_SYNC E1000_SW_FW_SYNC /* Statistics counters collected by the MAC */ struct e1000_hw_stats { @@ -1303,6 +1326,7 @@ struct e1000_hw { e1000_ffe_config ffe_config_state; uint32_t asf_firmware_present; uint32_t eeprom_semaphore_present; + uint32_t swfw_sync_present; unsigned long io_base; uint32_t phy_id; uint32_t phy_revision; @@ -1361,6 +1385,7 @@ struct e1000_hw { boolean_t ifs_params_forced; boolean_t in_ifs_mode; boolean_t mng_reg_access_disabled; + boolean_t leave_av_bit_off; }; @@ -1393,6 +1418,8 @@ struct e1000_hw { #define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ #define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ #define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ +#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ +#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ @@ -1429,6 +1456,16 @@ struct e1000_hw { #define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ #define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ #define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ +#define E1000_STATUS_BMC_SKU_0 0x00100000 /* BMC USB redirect disabled */ +#define E1000_STATUS_BMC_SKU_1 0x00200000 /* BMC SRAM disabled */ +#define E1000_STATUS_BMC_SKU_2 0x00400000 /* BMC SDRAM disabled */ +#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */ +#define E1000_STATUS_BMC_LITE 0x01000000 /* BMC external code execution disabled */ +#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */ +#define E1000_STATUS_FUSE_8 0x04000000 +#define E1000_STATUS_FUSE_9 0x08000000 +#define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ +#define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ /* Constants used to intrepret the masked PCI-X bus speed. */ #define E1000_STATUS_PCIX_SPEED_66 0x00000000 /* PCI-X bus speed 50-66 MHz */ @@ -1506,6 +1543,8 @@ struct e1000_hw { #define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 #define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 #define E1000_CTRL_EXT_LINK_MODE_TBI 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_KMRN 0x00000000 +#define E1000_CTRL_EXT_LINK_MODE_SERDES 0x00C00000 #define E1000_CTRL_EXT_WR_WMARK_MASK 0x03000000 #define E1000_CTRL_EXT_WR_WMARK_256 0x00000000 #define E1000_CTRL_EXT_WR_WMARK_320 0x01000000 @@ -1515,6 +1554,9 @@ struct e1000_hw { #define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ #define E1000_CTRL_EXT_IAME 0x08000000 /* Interrupt acknowledge Auto-mask */ #define E1000_CTRL_EXT_INT_TIMER_CLR 0x20000000 /* Clear Interrupt timers after IMS clear */ +#define E1000_CRTL_EXT_PB_PAREN 0x01000000 /* packet buffer parity error detection enabled */ +#define E1000_CTRL_EXT_DF_PAREN 0x02000000 /* descriptor FIFO parity error detection enable */ +#define E1000_CTRL_EXT_GHOST_PAREN 0x40000000 /* MDI Control */ #define E1000_MDIC_DATA_MASK 0x0000FFFF @@ -1528,6 +1570,32 @@ struct e1000_hw { #define E1000_MDIC_INT_EN 0x20000000 #define E1000_MDIC_ERROR 0x40000000 +#define E1000_KUMCTRLSTA_MASK 0x0000FFFF +#define E1000_KUMCTRLSTA_OFFSET 0x001F0000 +#define E1000_KUMCTRLSTA_OFFSET_SHIFT 16 +#define E1000_KUMCTRLSTA_REN 0x00200000 + +#define E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL 0x00000000 +#define E1000_KUMCTRLSTA_OFFSET_CTRL 0x00000001 +#define E1000_KUMCTRLSTA_OFFSET_INB_CTRL 0x00000002 +#define E1000_KUMCTRLSTA_OFFSET_DIAG 0x00000003 +#define E1000_KUMCTRLSTA_OFFSET_TIMEOUTS 0x00000004 +#define E1000_KUMCTRLSTA_OFFSET_INB_PARAM 0x00000009 +#define E1000_KUMCTRLSTA_OFFSET_HD_CTRL 0x00000010 +#define E1000_KUMCTRLSTA_OFFSET_M2P_SERDES 0x0000001E +#define E1000_KUMCTRLSTA_OFFSET_M2P_MODES 0x0000001F + +/* FIFO Control */ +#define E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS 0x00000008 +#define E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS 0x00000800 + +/* In-Band Control */ +#define E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING 0x00000010 + +/* Half-Duplex Control */ +#define E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT 0x00000004 +#define E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT 0x00000000 + /* LED Control */ #define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F #define E1000_LEDCTL_LED0_MODE_SHIFT 0 @@ -1590,6 +1658,13 @@ struct e1000_hw { #define E1000_ICR_MNG 0x00040000 /* Manageability event */ #define E1000_ICR_DOCK 0x00080000 /* Dock/Undock */ #define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ +#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_ICR_HOST_ARB_PAR 0x00400000 /* host arb read buffer parity error */ +#define E1000_ICR_PB_PAR 0x00800000 /* packet buffer parity error */ +#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */ +#define E1000_ICR_ALL_PARITY 0x03F00000 /* all parity error bits */ /* Interrupt Cause Set */ #define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ @@ -1610,6 +1685,12 @@ struct e1000_hw { #define E1000_ICS_ACK E1000_ICR_ACK /* Receive Ack frame */ #define E1000_ICS_MNG E1000_ICR_MNG /* Manageability event */ #define E1000_ICS_DOCK E1000_ICR_DOCK /* Dock/Undock */ +#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_ICS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ +#define E1000_ICS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ +#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ /* Interrupt Mask Set */ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ @@ -1630,6 +1711,12 @@ struct e1000_hw { #define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ #define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ #define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ +#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_IMS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ +#define E1000_IMS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ +#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ /* Interrupt Mask Clear */ #define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */ @@ -1650,6 +1737,12 @@ struct e1000_hw { #define E1000_IMC_ACK E1000_ICR_ACK /* Receive Ack frame */ #define E1000_IMC_MNG E1000_ICR_MNG /* Manageability event */ #define E1000_IMC_DOCK E1000_ICR_DOCK /* Dock/Undock */ +#define E1000_IMC_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ +#define E1000_IMC_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ +#define E1000_IMC_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ +#define E1000_IMC_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ +#define E1000_IMC_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ +#define E1000_IMC_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ /* Receive Control */ #define E1000_RCTL_RST 0x00000001 /* Software reset */ @@ -1719,6 +1812,12 @@ struct e1000_hw { #define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ #define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ +/* SW_W_SYNC definitions */ +#define E1000_SWFW_EEP_SM 0x0001 +#define E1000_SWFW_PHY0_SM 0x0002 +#define E1000_SWFW_PHY1_SM 0x0004 +#define E1000_SWFW_MAC_CSR_SM 0x0008 + /* Receive Descriptor */ #define E1000_RDT_DELAY 0x0000ffff /* Delay timer (1=1024us) */ #define E1000_RDT_FPDB 0x80000000 /* Flush descriptor block */ @@ -1797,6 +1896,11 @@ struct e1000_hw { #define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ #define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ #define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ +/* Extended Transmit Control */ +#define E1000_TCTL_EXT_BST_MASK 0x000003FF /* Backoff Slot Time */ +#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */ + +#define DEFAULT_80003ES2LAN_TCTL_EXT_GCEX 0x00010000 /* Receive Checksum Control */ #define E1000_RXCSUM_PCSS_MASK 0x000000FF /* Packet Checksum Start */ @@ -1874,6 +1978,7 @@ struct e1000_hw { #define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ #define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ #define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ +#define E1000_MANC_RCV_ALL 0x00080000 /* Receive All Enabled */ #define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ #define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 /* Enable MAC address * filtering */ @@ -1962,19 +2067,19 @@ struct e1000_host_command_info { /* PCI-Ex registers */ /* PCI-Ex Control Register */ -#define E1000_GCR_RXD_NO_SNOOP 0x00000001 -#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 -#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 -#define E1000_GCR_TXD_NO_SNOOP 0x00000008 -#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 -#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 - -#define PCI_EX_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ - E1000_GCR_RXDSCW_NO_SNOOP | \ - E1000_GCR_RXDSCR_NO_SNOOP | \ - E1000_GCR TXD_NO_SNOOP | \ - E1000_GCR_TXDSCW_NO_SNOOP | \ - E1000_GCR_TXDSCR_NO_SNOOP) +#define E1000_GCR_RXD_NO_SNOOP 0x00000001 +#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 +#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 +#define E1000_GCR_TXD_NO_SNOOP 0x00000008 +#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 +#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 + +#define PCI_EX_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ + E1000_GCR_RXDSCW_NO_SNOOP | \ + E1000_GCR_RXDSCR_NO_SNOOP | \ + E1000_GCR_TXD_NO_SNOOP | \ + E1000_GCR_TXDSCW_NO_SNOOP | \ + E1000_GCR_TXDSCR_NO_SNOOP) #define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000 /* Function Active and Power State to MNG */ @@ -2035,12 +2140,14 @@ struct e1000_host_command_info { #define EEPROM_INIT_CONTROL1_REG 0x000A #define EEPROM_INIT_CONTROL2_REG 0x000F #define EEPROM_INIT_CONTROL3_PORT_B 0x0014 +#define EEPROM_INIT_3GIO_3 0x001A #define EEPROM_INIT_CONTROL3_PORT_A 0x0024 #define EEPROM_CFG 0x0012 #define EEPROM_FLASH_VERSION 0x0032 #define EEPROM_CHECKSUM_REG 0x003F #define E1000_EEPROM_CFG_DONE 0x00040000 /* MNG config cycle done */ +#define E1000_EEPROM_CFG_DONE_PORT_1 0x00080000 /* ...for second port */ /* Word definitions for ID LED Settings */ #define ID_LED_RESERVED_0000 0x0000 @@ -2084,6 +2191,9 @@ struct e1000_host_command_info { #define EEPROM_WORD0F_ANE 0x0800 #define EEPROM_WORD0F_SWPDIO_EXT 0x00F0 +/* Mask bits for fields in Word 0x1a of the EEPROM */ +#define EEPROM_WORD1A_ASPM_MASK 0x000C + /* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ #define EEPROM_SUM 0xBABA @@ -2126,8 +2236,11 @@ struct e1000_host_command_info { #define DEFAULT_82542_TIPG_IPGR2 10 #define DEFAULT_82543_TIPG_IPGR2 6 +#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 #define E1000_TIPG_IPGR2_SHIFT 20 +#define DEFAULT_80003ES2LAN_TIPG_IPGT_10_100 0x00000009 +#define DEFAULT_80003ES2LAN_TIPG_IPGT_1000 0x00000008 #define E1000_TXDMAC_DPP 0x00000001 /* Adaptive IFS defines */ @@ -2368,6 +2481,78 @@ struct e1000_host_command_info { #define IGP01E1000_ANALOG_REGS_PAGE 0x20C0 +/* Bits... + * 15-5: page + * 4-0: register offset + */ +#define GG82563_PAGE_SHIFT 5 +#define GG82563_REG(page, reg) \ + (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) +#define GG82563_MIN_ALT_REG 30 + +/* GG82563 Specific Registers */ +#define GG82563_PHY_SPEC_CTRL \ + GG82563_REG(0, 16) /* PHY Specific Control */ +#define GG82563_PHY_SPEC_STATUS \ + GG82563_REG(0, 17) /* PHY Specific Status */ +#define GG82563_PHY_INT_ENABLE \ + GG82563_REG(0, 18) /* Interrupt Enable */ +#define GG82563_PHY_SPEC_STATUS_2 \ + GG82563_REG(0, 19) /* PHY Specific Status 2 */ +#define GG82563_PHY_RX_ERR_CNTR \ + GG82563_REG(0, 21) /* Receive Error Counter */ +#define GG82563_PHY_PAGE_SELECT \ + GG82563_REG(0, 22) /* Page Select */ +#define GG82563_PHY_SPEC_CTRL_2 \ + GG82563_REG(0, 26) /* PHY Specific Control 2 */ +#define GG82563_PHY_PAGE_SELECT_ALT \ + GG82563_REG(0, 29) /* Alternate Page Select */ +#define GG82563_PHY_TEST_CLK_CTRL \ + GG82563_REG(0, 30) /* Test Clock Control (use reg. 29 to select) */ + +#define GG82563_PHY_MAC_SPEC_CTRL \ + GG82563_REG(2, 21) /* MAC Specific Control Register */ +#define GG82563_PHY_MAC_SPEC_CTRL_2 \ + GG82563_REG(2, 26) /* MAC Specific Control 2 */ + +#define GG82563_PHY_DSP_DISTANCE \ + GG82563_REG(5, 26) /* DSP Distance */ + +/* Page 193 - Port Control Registers */ +#define GG82563_PHY_KMRN_MODE_CTRL \ + GG82563_REG(193, 16) /* Kumeran Mode Control */ +#define GG82563_PHY_PORT_RESET \ + GG82563_REG(193, 17) /* Port Reset */ +#define GG82563_PHY_REVISION_ID \ + GG82563_REG(193, 18) /* Revision ID */ +#define GG82563_PHY_DEVICE_ID \ + GG82563_REG(193, 19) /* Device ID */ +#define GG82563_PHY_PWR_MGMT_CTRL \ + GG82563_REG(193, 20) /* Power Management Control */ +#define GG82563_PHY_RATE_ADAPT_CTRL \ + GG82563_REG(193, 25) /* Rate Adaptation Control */ + +/* Page 194 - KMRN Registers */ +#define GG82563_PHY_KMRN_FIFO_CTRL_STAT \ + GG82563_REG(194, 16) /* FIFO's Control/Status */ +#define GG82563_PHY_KMRN_CTRL \ + GG82563_REG(194, 17) /* Control */ +#define GG82563_PHY_INBAND_CTRL \ + GG82563_REG(194, 18) /* Inband Control */ +#define GG82563_PHY_KMRN_DIAGNOSTIC \ + GG82563_REG(194, 19) /* Diagnostic */ +#define GG82563_PHY_ACK_TIMEOUTS \ + GG82563_REG(194, 20) /* Acknowledge Timeouts */ +#define GG82563_PHY_ADV_ABILITY \ + GG82563_REG(194, 21) /* Advertised Ability */ +#define GG82563_PHY_LINK_PARTNER_ADV_ABILITY \ + GG82563_REG(194, 23) /* Link Partner Advertised Ability */ +#define GG82563_PHY_ADV_NEXT_PAGE \ + GG82563_REG(194, 24) /* Advertised Next Page */ +#define GG82563_PHY_LINK_PARTNER_ADV_NEXT_PAGE \ + GG82563_REG(194, 25) /* Link Partner Advertised Next page */ +#define GG82563_PHY_KMRN_MISC \ + GG82563_REG(194, 26) /* Misc. */ /* PHY Control Register */ #define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ @@ -2681,6 +2866,113 @@ struct e1000_host_command_info { #define IGP01E1000_ANALOG_FUSE_FINE_1 0x0080 #define IGP01E1000_ANALOG_FUSE_FINE_10 0x0500 +/* GG82563 PHY Specific Status Register (Page 0, Register 16 */ +#define GG82563_PSCR_DISABLE_JABBER 0x0001 /* 1=Disable Jabber */ +#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE 0x0002 /* 1=Polarity Reversal Disabled */ +#define GG82563_PSCR_POWER_DOWN 0x0004 /* 1=Power Down */ +#define GG82563_PSCR_COPPER_TRANSMITER_DISABLE 0x0008 /* 1=Transmitter Disabled */ +#define GG82563_PSCR_CROSSOVER_MODE_MASK 0x0060 +#define GG82563_PSCR_CROSSOVER_MODE_MDI 0x0000 /* 00=Manual MDI configuration */ +#define GG82563_PSCR_CROSSOVER_MODE_MDIX 0x0020 /* 01=Manual MDIX configuration */ +#define GG82563_PSCR_CROSSOVER_MODE_AUTO 0x0060 /* 11=Automatic crossover */ +#define GG82563_PSCR_ENALBE_EXTENDED_DISTANCE 0x0080 /* 1=Enable Extended Distance */ +#define GG82563_PSCR_ENERGY_DETECT_MASK 0x0300 +#define GG82563_PSCR_ENERGY_DETECT_OFF 0x0000 /* 00,01=Off */ +#define GG82563_PSCR_ENERGY_DETECT_RX 0x0200 /* 10=Sense on Rx only (Energy Detect) */ +#define GG82563_PSCR_ENERGY_DETECT_RX_TM 0x0300 /* 11=Sense and Tx NLP */ +#define GG82563_PSCR_FORCE_LINK_GOOD 0x0400 /* 1=Force Link Good */ +#define GG82563_PSCR_DOWNSHIFT_ENABLE 0x0800 /* 1=Enable Downshift */ +#define GG82563_PSCR_DOWNSHIFT_COUNTER_MASK 0x7000 +#define GG82563_PSCR_DOWNSHIFT_COUNTER_SHIFT 12 + +/* PHY Specific Status Register (Page 0, Register 17) */ +#define GG82563_PSSR_JABBER 0x0001 /* 1=Jabber */ +#define GG82563_PSSR_POLARITY 0x0002 /* 1=Polarity Reversed */ +#define GG82563_PSSR_LINK 0x0008 /* 1=Link is Up */ +#define GG82563_PSSR_ENERGY_DETECT 0x0010 /* 1=Sleep, 0=Active */ +#define GG82563_PSSR_DOWNSHIFT 0x0020 /* 1=Downshift */ +#define GG82563_PSSR_CROSSOVER_STATUS 0x0040 /* 1=MDIX, 0=MDI */ +#define GG82563_PSSR_RX_PAUSE_ENABLED 0x0100 /* 1=Receive Pause Enabled */ +#define GG82563_PSSR_TX_PAUSE_ENABLED 0x0200 /* 1=Transmit Pause Enabled */ +#define GG82563_PSSR_LINK_UP 0x0400 /* 1=Link Up */ +#define GG82563_PSSR_SPEED_DUPLEX_RESOLVED 0x0800 /* 1=Resolved */ +#define GG82563_PSSR_PAGE_RECEIVED 0x1000 /* 1=Page Received */ +#define GG82563_PSSR_DUPLEX 0x2000 /* 1-Full-Duplex */ +#define GG82563_PSSR_SPEED_MASK 0xC000 +#define GG82563_PSSR_SPEED_10MBPS 0x0000 /* 00=10Mbps */ +#define GG82563_PSSR_SPEED_100MBPS 0x4000 /* 01=100Mbps */ +#define GG82563_PSSR_SPEED_1000MBPS 0x8000 /* 10=1000Mbps */ + +/* PHY Specific Status Register 2 (Page 0, Register 19) */ +#define GG82563_PSSR2_JABBER 0x0001 /* 1=Jabber */ +#define GG82563_PSSR2_POLARITY_CHANGED 0x0002 /* 1=Polarity Changed */ +#define GG82563_PSSR2_ENERGY_DETECT_CHANGED 0x0010 /* 1=Energy Detect Changed */ +#define GG82563_PSSR2_DOWNSHIFT_INTERRUPT 0x0020 /* 1=Downshift Detected */ +#define GG82563_PSSR2_MDI_CROSSOVER_CHANGE 0x0040 /* 1=Crossover Changed */ +#define GG82563_PSSR2_FALSE_CARRIER 0x0100 /* 1=False Carrier */ +#define GG82563_PSSR2_SYMBOL_ERROR 0x0200 /* 1=Symbol Error */ +#define GG82563_PSSR2_LINK_STATUS_CHANGED 0x0400 /* 1=Link Status Changed */ +#define GG82563_PSSR2_AUTO_NEG_COMPLETED 0x0800 /* 1=Auto-Neg Completed */ +#define GG82563_PSSR2_PAGE_RECEIVED 0x1000 /* 1=Page Received */ +#define GG82563_PSSR2_DUPLEX_CHANGED 0x2000 /* 1=Duplex Changed */ +#define GG82563_PSSR2_SPEED_CHANGED 0x4000 /* 1=Speed Changed */ +#define GG82563_PSSR2_AUTO_NEG_ERROR 0x8000 /* 1=Auto-Neg Error */ + +/* PHY Specific Control Register 2 (Page 0, Register 26) */ +#define GG82563_PSCR2_10BT_POLARITY_FORCE 0x0002 /* 1=Force Negative Polarity */ +#define GG82563_PSCR2_1000MB_TEST_SELECT_MASK 0x000C +#define GG82563_PSCR2_1000MB_TEST_SELECT_NORMAL 0x0000 /* 00,01=Normal Operation */ +#define GG82563_PSCR2_1000MB_TEST_SELECT_112NS 0x0008 /* 10=Select 112ns Sequence */ +#define GG82563_PSCR2_1000MB_TEST_SELECT_16NS 0x000C /* 11=Select 16ns Sequence */ +#define GG82563_PSCR2_REVERSE_AUTO_NEG 0x2000 /* 1=Reverse Auto-Negotiation */ +#define GG82563_PSCR2_1000BT_DISABLE 0x4000 /* 1=Disable 1000BASE-T */ +#define GG82563_PSCR2_TRANSMITER_TYPE_MASK 0x8000 +#define GG82563_PSCR2_TRANSMITTER_TYPE_CLASS_B 0x0000 /* 0=Class B */ +#define GG82563_PSCR2_TRANSMITTER_TYPE_CLASS_A 0x8000 /* 1=Class A */ + +/* MAC Specific Control Register (Page 2, Register 21) */ +/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */ +#define GG82563_MSCR_TX_CLK_MASK 0x0007 +#define GG82563_MSCR_TX_CLK_10MBPS_2_5MHZ 0x0004 +#define GG82563_MSCR_TX_CLK_100MBPS_25MHZ 0x0005 +#define GG82563_MSCR_TX_CLK_1000MBPS_2_5MHZ 0x0006 +#define GG82563_MSCR_TX_CLK_1000MBPS_25MHZ 0x0007 + +#define GG82563_MSCR_ASSERT_CRS_ON_TX 0x0010 /* 1=Assert */ + +/* DSP Distance Register (Page 5, Register 26) */ +#define GG82563_DSPD_CABLE_LENGTH 0x0007 /* 0 = <50M; + 1 = 50-80M; + 2 = 80-110M; + 3 = 110-140M; + 4 = >140M */ + +/* Kumeran Mode Control Register (Page 193, Register 16) */ +#define GG82563_KMCR_PHY_LEDS_EN 0x0020 /* 1=PHY LEDs, 0=Kumeran Inband LEDs */ +#define GG82563_KMCR_FORCE_LINK_UP 0x0040 /* 1=Force Link Up */ +#define GG82563_KMCR_SUPPRESS_SGMII_EPD_EXT 0x0080 +#define GG82563_KMCR_MDIO_BUS_SPEED_SELECT_MASK 0x0400 +#define GG82563_KMCR_MDIO_BUS_SPEED_SELECT 0x0400 /* 1=6.25MHz, 0=0.8MHz */ +#define GG82563_KMCR_PASS_FALSE_CARRIER 0x0800 + +/* Power Management Control Register (Page 193, Register 20) */ +#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE 0x0001 /* 1=Enalbe SERDES Electrical Idle */ +#define GG82563_PMCR_DISABLE_PORT 0x0002 /* 1=Disable Port */ +#define GG82563_PMCR_DISABLE_SERDES 0x0004 /* 1=Disable SERDES */ +#define GG82563_PMCR_REVERSE_AUTO_NEG 0x0008 /* 1=Enable Reverse Auto-Negotiation */ +#define GG82563_PMCR_DISABLE_1000_NON_D0 0x0010 /* 1=Disable 1000Mbps Auto-Neg in non D0 */ +#define GG82563_PMCR_DISABLE_1000 0x0020 /* 1=Disable 1000Mbps Auto-Neg Always */ +#define GG82563_PMCR_REVERSE_AUTO_NEG_D0A 0x0040 /* 1=Enable D0a Reverse Auto-Negotiation */ +#define GG82563_PMCR_FORCE_POWER_STATE 0x0080 /* 1=Force Power State */ +#define GG82563_PMCR_PROGRAMMED_POWER_STATE_MASK 0x0300 +#define GG82563_PMCR_PROGRAMMED_POWER_STATE_DR 0x0000 /* 00=Dr */ +#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D0U 0x0100 /* 01=D0u */ +#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D0A 0x0200 /* 10=D0a */ +#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D3 0x0300 /* 11=D3 */ + +/* In-Band Control Register (Page 194, Register 18) */ +#define GG82563_ICR_DIS_PADDING 0x0010 /* Disable Padding Use */ + /* Bit definitions for valid PHY IDs. */ /* I = Integrated @@ -2695,6 +2987,7 @@ struct e1000_host_command_info { #define M88E1011_I_REV_4 0x04 #define M88E1111_I_PHY_ID 0x01410CC0 #define L1LXT971A_PHY_ID 0x001378E0 +#define GG82563_E_PHY_ID 0x01410CA0 /* Miscellaneous PHY bit definitions. */ #define PHY_PREAMBLE 0xFFFFFFFF diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 84dcca3..f39de16 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -29,6 +29,23 @@ #include "e1000.h" /* Change Log + * 7.0.33 3-Feb-2006 + * o Added another fix for the pass false carrier bit + * 7.0.32 24-Jan-2006 + * o Need to rebuild with noew version number for the pass false carrier + * fix in e1000_hw.c + * 7.0.30 18-Jan-2006 + * o fixup for tso workaround to disable it for pci-x + * o fix mem leak on 82542 + * o fixes for 10 Mb/s connections and incorrect stats + * 7.0.28 01/06/2006 + * o hardware workaround to only set "speed mode" bit for 1G link. + * 7.0.26 12/23/2005 + * o wake on lan support modified for device ID 10B5 + * o fix dhcp + vlan issue not making it to the iAMT firmware + * 7.0.24 12/9/2005 + * o New hardware support for the Gigabit NIC embedded in the south bridge + * o Fixes to the recycling logic (skb->tail) from IBM LTC * 6.3.9 12/16/2005 * o incorporate fix for recycled skbs from IBM LTC * 6.3.7 11/18/2005 @@ -46,54 +63,8 @@ * rx_buffer_len * 6.3.1 9/19/05 * o Use adapter->tx_timeout_factor in Tx Hung Detect logic - (e1000_clean_tx_irq) + * (e1000_clean_tx_irq) * o Support for 8086:10B5 device (Quad Port) - * 6.2.14 9/15/05 - * o In AMT enabled configurations, set/reset DRV_LOAD bit on interface - * open/close - * 6.2.13 9/14/05 - * o Invoke e1000_check_mng_mode only for 8257x controllers since it - * accesses the FWSM that is not supported in other controllers - * 6.2.12 9/9/05 - * o Add support for device id E1000_DEV_ID_82546GB_QUAD_COPPER - * o set RCTL:SECRC only for controllers newer than 82543. - * o When the n/w interface comes down reset DRV_LOAD bit to notify f/w. - * This code was moved from e1000_remove to e1000_close - * 6.2.10 9/6/05 - * o Fix error in updating RDT in el1000_alloc_rx_buffers[_ps] -- one off. - * o Enable fc by default on 82573 controllers (do not read eeprom) - * o Fix rx_errors statistic not to include missed_packet_count - * o Fix rx_dropped statistic not to include missed_packet_count - (Padraig Brady) - * 6.2.9 8/30/05 - * o Remove call to update statistics from the controller ib e1000_get_stats - * 6.2.8 8/30/05 - * o Improved algorithm for rx buffer allocation/rdt update - * o Flow control watermarks relative to rx PBA size - * o Simplified 'Tx Hung' detect logic - * 6.2.7 8/17/05 - * o Report rx buffer allocation failures and tx timeout counts in stats - * 6.2.6 8/16/05 - * o Implement workaround for controller erratum -- linear non-tso packet - * following a TSO gets written back prematurely - * 6.2.5 8/15/05 - * o Set netdev->tx_queue_len based on link speed/duplex settings. - * o Fix net_stats.rx_fifo_errors <p@draigBrady.com> - * o Do not power off PHY if SoL/IDER session is active - * 6.2.4 8/10/05 - * o Fix loopback test setup/cleanup for 82571/3 controllers - * o Fix parsing of outgoing packets (e1000_transfer_dhcp_info) to treat - * all packets as raw - * o Prevent operations that will cause the PHY to be reset if SoL/IDER - * sessions are active and log a message - * 6.2.2 7/21/05 - * o used fixed size descriptors for all MTU sizes, reduces memory load - * 6.1.2 4/13/05 - * o Fixed ethtool diagnostics - * o Enabled flow control to take default eeprom settings - * o Added stats_lock around e1000_read_phy_reg commands to avoid concurrent - * calls, one from mii_ioctl and other from within update_stats while - * processing MIIREG ioctl. */ char e1000_driver_name[] = "e1000"; @@ -103,7 +74,7 @@ static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; #else #define DRIVERNAPI "-NAPI" #endif -#define DRV_VERSION "6.3.9-k4"DRIVERNAPI +#define DRV_VERSION "7.0.33-k2"DRIVERNAPI char e1000_driver_version[] = DRV_VERSION; static char e1000_copyright[] = "Copyright (c) 1999-2005 Intel Corporation."; @@ -157,32 +128,26 @@ static struct pci_device_id e1000_pci_tbl[] = { INTEL_E1000_ETHERNET_DEVICE(0x108A), INTEL_E1000_ETHERNET_DEVICE(0x108B), INTEL_E1000_ETHERNET_DEVICE(0x108C), + INTEL_E1000_ETHERNET_DEVICE(0x1096), + INTEL_E1000_ETHERNET_DEVICE(0x1098), INTEL_E1000_ETHERNET_DEVICE(0x1099), INTEL_E1000_ETHERNET_DEVICE(0x109A), INTEL_E1000_ETHERNET_DEVICE(0x10B5), + INTEL_E1000_ETHERNET_DEVICE(0x10B9), /* required last entry */ {0,} }; MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); -int e1000_up(struct e1000_adapter *adapter); -void e1000_down(struct e1000_adapter *adapter); -void e1000_reset(struct e1000_adapter *adapter); -int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); -int e1000_setup_all_tx_resources(struct e1000_adapter *adapter); -int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); -void e1000_free_all_tx_resources(struct e1000_adapter *adapter); -void e1000_free_all_rx_resources(struct e1000_adapter *adapter); static int e1000_setup_tx_resources(struct e1000_adapter *adapter, - struct e1000_tx_ring *txdr); + struct e1000_tx_ring *txdr); static int e1000_setup_rx_resources(struct e1000_adapter *adapter, - struct e1000_rx_ring *rxdr); + struct e1000_rx_ring *rxdr); static void e1000_free_tx_resources(struct e1000_adapter *adapter, - struct e1000_tx_ring *tx_ring); + struct e1000_tx_ring *tx_ring); static void e1000_free_rx_resources(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); -void e1000_update_stats(struct e1000_adapter *adapter); + struct e1000_rx_ring *rx_ring); /* Local Function Prototypes */ @@ -191,9 +156,6 @@ static void e1000_exit_module(void); static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void __devexit e1000_remove(struct pci_dev *pdev); static int e1000_alloc_queues(struct e1000_adapter *adapter); -#ifdef CONFIG_E1000_MQ -static void e1000_setup_queue_mapping(struct e1000_adapter *adapter); -#endif static int e1000_sw_init(struct e1000_adapter *adapter); static int e1000_open(struct net_device *netdev); static int e1000_close(struct net_device *netdev); @@ -241,11 +203,10 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); -void e1000_set_ethtool_ops(struct net_device *netdev); static void e1000_enter_82542_rst(struct e1000_adapter *adapter); static void e1000_leave_82542_rst(struct e1000_adapter *adapter); static void e1000_tx_timeout(struct net_device *dev); -static void e1000_tx_timeout_task(struct net_device *dev); +static void e1000_reset_task(struct net_device *dev); static void e1000_smartspeed(struct e1000_adapter *adapter); static inline int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, struct sk_buff *skb); @@ -265,14 +226,6 @@ static int e1000_resume(struct pci_dev *pdev); static void e1000_netpoll (struct net_device *netdev); #endif -#ifdef CONFIG_E1000_MQ -/* for multiple Rx queues */ -void e1000_rx_schedule(void *data); -#endif - -/* Exported from other modules */ - -extern void e1000_check_options(struct e1000_adapter *adapter); static struct pci_driver e1000_driver = { .name = e1000_driver_name, @@ -380,7 +333,8 @@ e1000_update_mng_vlan(struct e1000_adapter *adapter) (vid != old_vid) && !adapter->vlgrp->vlan_devices[old_vid]) e1000_vlan_rx_kill_vid(netdev, old_vid); - } + } else + adapter->mng_vlan_id = vid; } } @@ -502,10 +456,6 @@ e1000_up(struct e1000_adapter *adapter) return err; } -#ifdef CONFIG_E1000_MQ - e1000_setup_queue_mapping(adapter); -#endif - adapter->tx_queue_len = netdev->tx_queue_len; mod_timer(&adapter->watchdog_timer, jiffies); @@ -526,9 +476,7 @@ e1000_down(struct e1000_adapter *adapter) e1000_check_mng_mode(&adapter->hw); e1000_irq_disable(adapter); -#ifdef CONFIG_E1000_MQ - while (atomic_read(&adapter->rx_sched_call_data.count) != 0); -#endif + free_irq(adapter->pdev->irq, netdev); #ifdef CONFIG_PCI_MSI if (adapter->hw.mac_type > e1000_82547_rev_2 && @@ -587,6 +535,7 @@ e1000_reset(struct e1000_adapter *adapter) break; case e1000_82571: case e1000_82572: + case e1000_80003es2lan: pba = E1000_PBA_38K; break; case e1000_82573: @@ -619,7 +568,10 @@ e1000_reset(struct e1000_adapter *adapter) adapter->hw.fc_high_water = fc_high_water_mark; adapter->hw.fc_low_water = fc_high_water_mark - 8; - adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME; + if (adapter->hw.mac_type == e1000_80003es2lan) + adapter->hw.fc_pause_time = 0xFFFF; + else + adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME; adapter->hw.fc_send_xon = 1; adapter->hw.fc = adapter->hw.original_fc; @@ -663,6 +615,7 @@ e1000_probe(struct pci_dev *pdev, unsigned long mmio_start, mmio_len; static int cards_found = 0; + static int e1000_ksp3_port_a = 0; /* global ksp3 port a indication */ int i, err, pci_using_dac; uint16_t eeprom_data; uint16_t eeprom_apme_mask = E1000_EEPROM_APME; @@ -755,6 +708,15 @@ e1000_probe(struct pci_dev *pdev, if ((err = e1000_check_phy_reset_block(&adapter->hw))) DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n"); + /* if ksp3, indicate if it's port a being setup */ + if (pdev->device == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 && + e1000_ksp3_port_a == 0) + adapter->ksp3_port_a = 1; + e1000_ksp3_port_a++; + /* Reset for multiple KP3 adapters */ + if (e1000_ksp3_port_a == 4) + e1000_ksp3_port_a = 0; + if (adapter->hw.mac_type >= e1000_82543) { netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | @@ -826,8 +788,8 @@ e1000_probe(struct pci_dev *pdev, adapter->phy_info_timer.function = &e1000_update_phy_info; adapter->phy_info_timer.data = (unsigned long) adapter; - INIT_WORK(&adapter->tx_timeout_task, - (void (*)(void *))e1000_tx_timeout_task, netdev); + INIT_WORK(&adapter->reset_task, + (void (*)(void *))e1000_reset_task, netdev); /* we're going to reset, so assume we have no link for now */ @@ -854,6 +816,7 @@ e1000_probe(struct pci_dev *pdev, case e1000_82546: case e1000_82546_rev_3: case e1000_82571: + case e1000_80003es2lan: if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1){ e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); @@ -972,10 +935,6 @@ e1000_remove(struct pci_dev *pdev) iounmap(adapter->hw.hw_addr); pci_release_regions(pdev); -#ifdef CONFIG_E1000_MQ - free_percpu(adapter->cpu_netdev); - free_percpu(adapter->cpu_tx_ring); -#endif free_netdev(netdev); pci_disable_device(pdev); @@ -1056,40 +1015,8 @@ e1000_sw_init(struct e1000_adapter *adapter) hw->master_slave = E1000_MASTER_SLAVE; } -#ifdef CONFIG_E1000_MQ - /* Number of supported queues */ - switch (hw->mac_type) { - case e1000_82571: - case e1000_82572: - /* These controllers support 2 tx queues, but with a single - * qdisc implementation, multiple tx queues aren't quite as - * interesting. If we can find a logical way of mapping - * flows to a queue, then perhaps we can up the num_tx_queue - * count back to its default. Until then, we run the risk of - * terrible performance due to SACK overload. */ - adapter->num_tx_queues = 1; - adapter->num_rx_queues = 2; - break; - default: - adapter->num_tx_queues = 1; - adapter->num_rx_queues = 1; - break; - } - adapter->num_rx_queues = min(adapter->num_rx_queues, num_online_cpus()); - adapter->num_tx_queues = min(adapter->num_tx_queues, num_online_cpus()); - DPRINTK(DRV, INFO, "Multiqueue Enabled: Rx Queue count = %u %s\n", - adapter->num_rx_queues, - ((adapter->num_rx_queues == 1) - ? ((num_online_cpus() > 1) - ? "(due to unsupported feature in current adapter)" - : "(due to unsupported system configuration)") - : "")); - DPRINTK(DRV, INFO, "Multiqueue Enabled: Tx Queue count = %u\n", - adapter->num_tx_queues); -#else adapter->num_tx_queues = 1; adapter->num_rx_queues = 1; -#endif if (e1000_alloc_queues(adapter)) { DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n"); @@ -1152,51 +1079,9 @@ e1000_alloc_queues(struct e1000_adapter *adapter) memset(adapter->polling_netdev, 0, size); #endif -#ifdef CONFIG_E1000_MQ - adapter->rx_sched_call_data.func = e1000_rx_schedule; - adapter->rx_sched_call_data.info = adapter->netdev; - - adapter->cpu_netdev = alloc_percpu(struct net_device *); - adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); -#endif - return E1000_SUCCESS; } -#ifdef CONFIG_E1000_MQ -static void __devinit -e1000_setup_queue_mapping(struct e1000_adapter *adapter) -{ - int i, cpu; - - adapter->rx_sched_call_data.func = e1000_rx_schedule; - adapter->rx_sched_call_data.info = adapter->netdev; - cpus_clear(adapter->rx_sched_call_data.cpumask); - - adapter->cpu_netdev = alloc_percpu(struct net_device *); - adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); - - lock_cpu_hotplug(); - i = 0; - for_each_online_cpu(cpu) { - *per_cpu_ptr(adapter->cpu_tx_ring, cpu) = &adapter->tx_ring[i % adapter->num_tx_queues]; - /* This is incomplete because we'd like to assign separate - * physical cpus to these netdev polling structures and - * avoid saturating a subset of cpus. - */ - if (i < adapter->num_rx_queues) { - *per_cpu_ptr(adapter->cpu_netdev, cpu) = &adapter->polling_netdev[i]; - adapter->rx_ring[i].cpu = cpu; - cpu_set(cpu, adapter->cpumask); - } else - *per_cpu_ptr(adapter->cpu_netdev, cpu) = NULL; - - i++; - } - unlock_cpu_hotplug(); -} -#endif - /** * e1000_open - Called when a network interface is made active * @netdev: network interface device structure @@ -1435,18 +1320,6 @@ e1000_configure_tx(struct e1000_adapter *adapter) /* Setup the HW Tx Head and Tail descriptor pointers */ switch (adapter->num_tx_queues) { - case 2: - tdba = adapter->tx_ring[1].dma; - tdlen = adapter->tx_ring[1].count * - sizeof(struct e1000_tx_desc); - E1000_WRITE_REG(hw, TDBAL1, (tdba & 0x00000000ffffffffULL)); - E1000_WRITE_REG(hw, TDBAH1, (tdba >> 32)); - E1000_WRITE_REG(hw, TDLEN1, tdlen); - E1000_WRITE_REG(hw, TDH1, 0); - E1000_WRITE_REG(hw, TDT1, 0); - adapter->tx_ring[1].tdh = E1000_TDH1; - adapter->tx_ring[1].tdt = E1000_TDT1; - /* Fall Through */ case 1: default: tdba = adapter->tx_ring[0].dma; @@ -1477,6 +1350,10 @@ e1000_configure_tx(struct e1000_adapter *adapter) ipgr1 = DEFAULT_82542_TIPG_IPGR1; ipgr2 = DEFAULT_82542_TIPG_IPGR2; break; + case e1000_80003es2lan: + ipgr1 = DEFAULT_82543_TIPG_IPGR1; + ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; + break; default: ipgr1 = DEFAULT_82543_TIPG_IPGR1; ipgr2 = DEFAULT_82543_TIPG_IPGR2; @@ -1497,10 +1374,13 @@ e1000_configure_tx(struct e1000_adapter *adapter) tctl = E1000_READ_REG(hw, TCTL); tctl &= ~E1000_TCTL_CT; - tctl |= E1000_TCTL_EN | E1000_TCTL_PSP | E1000_TCTL_RTLC | + tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); - E1000_WRITE_REG(hw, TCTL, tctl); +#ifdef DISABLE_MULR + /* disable Multiple Reads for debugging */ + tctl &= ~E1000_TCTL_MULR; +#endif if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572) { tarc = E1000_READ_REG(hw, TARC0); @@ -1513,6 +1393,15 @@ e1000_configure_tx(struct e1000_adapter *adapter) else tarc |= (1 << 28); E1000_WRITE_REG(hw, TARC1, tarc); + } else if (hw->mac_type == e1000_80003es2lan) { + tarc = E1000_READ_REG(hw, TARC0); + tarc |= 1; + if (hw->media_type == e1000_media_type_internal_serdes) + tarc |= (1 << 20); + E1000_WRITE_REG(hw, TARC0, tarc); + tarc = E1000_READ_REG(hw, TARC1); + tarc |= 1; + E1000_WRITE_REG(hw, TARC1, tarc); } e1000_config_collision_dist(hw); @@ -1531,6 +1420,9 @@ e1000_configure_tx(struct e1000_adapter *adapter) if (hw->mac_type == e1000_82544 && hw->bus_type == e1000_bus_type_pcix) adapter->pcix_82544 = 1; + + E1000_WRITE_REG(hw, TCTL, tctl); + } /** @@ -1790,12 +1682,9 @@ e1000_configure_rx(struct e1000_adapter *adapter) uint64_t rdba; struct e1000_hw *hw = &adapter->hw; uint32_t rdlen, rctl, rxcsum, ctrl_ext; -#ifdef CONFIG_E1000_MQ - uint32_t reta, mrqc; - int i; -#endif if (adapter->rx_ps_pages) { + /* this is a 32 byte descriptor */ rdlen = adapter->rx_ring[0].count * sizeof(union e1000_rx_desc_packet_split); adapter->clean_rx = e1000_clean_rx_irq_ps; @@ -1837,18 +1726,6 @@ e1000_configure_rx(struct e1000_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ switch (adapter->num_rx_queues) { -#ifdef CONFIG_E1000_MQ - case 2: - rdba = adapter->rx_ring[1].dma; - E1000_WRITE_REG(hw, RDBAL1, (rdba & 0x00000000ffffffffULL)); - E1000_WRITE_REG(hw, RDBAH1, (rdba >> 32)); - E1000_WRITE_REG(hw, RDLEN1, rdlen); - E1000_WRITE_REG(hw, RDH1, 0); - E1000_WRITE_REG(hw, RDT1, 0); - adapter->rx_ring[1].rdh = E1000_RDH1; - adapter->rx_ring[1].rdt = E1000_RDT1; - /* Fall Through */ -#endif case 1: default: rdba = adapter->rx_ring[0].dma; @@ -1862,46 +1739,6 @@ e1000_configure_rx(struct e1000_adapter *adapter) break; } -#ifdef CONFIG_E1000_MQ - if (adapter->num_rx_queues > 1) { - uint32_t random[10]; - - get_random_bytes(&random[0], 40); - - if (hw->mac_type <= e1000_82572) { - E1000_WRITE_REG(hw, RSSIR, 0); - E1000_WRITE_REG(hw, RSSIM, 0); - } - - switch (adapter->num_rx_queues) { - case 2: - default: - reta = 0x00800080; - mrqc = E1000_MRQC_ENABLE_RSS_2Q; - break; - } - - /* Fill out redirection table */ - for (i = 0; i < 32; i++) - E1000_WRITE_REG_ARRAY(hw, RETA, i, reta); - /* Fill out hash function seeds */ - for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, RSSRK, i, random[i]); - - mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP); - E1000_WRITE_REG(hw, MRQC, mrqc); - } - - /* Multiqueue and packet checksumming are mutually exclusive. */ - if (hw->mac_type >= e1000_82571) { - rxcsum = E1000_READ_REG(hw, RXCSUM); - rxcsum |= E1000_RXCSUM_PCSD; - E1000_WRITE_REG(hw, RXCSUM, rxcsum); - } - -#else - /* Enable 82543 Receive Checksum Offload for TCP and UDP */ if (hw->mac_type >= e1000_82543) { rxcsum = E1000_READ_REG(hw, RXCSUM); @@ -1920,7 +1757,6 @@ e1000_configure_rx(struct e1000_adapter *adapter) } E1000_WRITE_REG(hw, RXCSUM, rxcsum); } -#endif /* CONFIG_E1000_MQ */ if (hw->mac_type == e1000_82573) E1000_WRITE_REG(hw, ERT, 0x0100); @@ -2392,7 +2228,7 @@ e1000_watchdog_task(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct e1000_tx_ring *txdr = adapter->tx_ring; - uint32_t link; + uint32_t link, tctl; e1000_check_for_link(&adapter->hw); if (adapter->hw.mac_type == e1000_82573) { @@ -2418,20 +2254,61 @@ e1000_watchdog_task(struct e1000_adapter *adapter) adapter->link_duplex == FULL_DUPLEX ? "Full Duplex" : "Half Duplex"); - /* tweak tx_queue_len according to speed/duplex */ + /* tweak tx_queue_len according to speed/duplex + * and adjust the timeout factor */ netdev->tx_queue_len = adapter->tx_queue_len; adapter->tx_timeout_factor = 1; - if (adapter->link_duplex == HALF_DUPLEX) { + adapter->txb2b = 1; + switch (adapter->link_speed) { + case SPEED_10: + adapter->txb2b = 0; + netdev->tx_queue_len = 10; + adapter->tx_timeout_factor = 8; + break; + case SPEED_100: + adapter->txb2b = 0; + netdev->tx_queue_len = 100; + /* maybe add some timeout factor ? */ + break; + } + + if ((adapter->hw.mac_type == e1000_82571 || + adapter->hw.mac_type == e1000_82572) && + adapter->txb2b == 0) { +#define SPEED_MODE_BIT (1 << 21) + uint32_t tarc0; + tarc0 = E1000_READ_REG(&adapter->hw, TARC0); + tarc0 &= ~SPEED_MODE_BIT; + E1000_WRITE_REG(&adapter->hw, TARC0, tarc0); + } + +#ifdef NETIF_F_TSO + /* disable TSO for pcie and 10/100 speeds, to avoid + * some hardware issues */ + if (!adapter->tso_force && + adapter->hw.bus_type == e1000_bus_type_pci_express){ switch (adapter->link_speed) { case SPEED_10: - netdev->tx_queue_len = 10; - adapter->tx_timeout_factor = 8; - break; case SPEED_100: - netdev->tx_queue_len = 100; + DPRINTK(PROBE,INFO, + "10/100 speed: disabling TSO\n"); + netdev->features &= ~NETIF_F_TSO; + break; + case SPEED_1000: + netdev->features |= NETIF_F_TSO; + break; + default: + /* oops */ break; } } +#endif + + /* enable transmits in the hardware, need to do this + * after setting TARC0 */ + tctl = E1000_READ_REG(&adapter->hw, TCTL); + tctl |= E1000_TCTL_EN; + E1000_WRITE_REG(&adapter->hw, TCTL, tctl); netif_carrier_on(netdev); netif_wake_queue(netdev); @@ -2446,6 +2323,16 @@ e1000_watchdog_task(struct e1000_adapter *adapter) netif_carrier_off(netdev); netif_stop_queue(netdev); mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); + + /* 80003ES2LAN workaround-- + * For packet buffer work-around on link down event; + * disable receives in the ISR and + * reset device here in the watchdog + */ + if (adapter->hw.mac_type == e1000_80003es2lan) { + /* reset device */ + schedule_work(&adapter->reset_task); + } } e1000_smartspeed(adapter); @@ -2465,16 +2352,14 @@ e1000_watchdog_task(struct e1000_adapter *adapter) e1000_update_adaptive(&adapter->hw); -#ifdef CONFIG_E1000_MQ - txdr = *per_cpu_ptr(adapter->cpu_tx_ring, smp_processor_id()); -#endif if (!netif_carrier_ok(netdev)) { if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ - schedule_work(&adapter->tx_timeout_task); + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); } } @@ -2649,9 +2534,9 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, /* Workaround for Controller erratum -- * descriptor for non-tso packet in a linear SKB that follows a * tso gets written back prematurely before the data is fully - * DMAd to the controller */ + * DMA'd to the controller */ if (!skb->data_len && tx_ring->last_tx_tso && - !skb_shinfo(skb)->tso_size) { + !skb_shinfo(skb)->tso_size) { tx_ring->last_tx_tso = 0; size -= 4; } @@ -2840,7 +2725,7 @@ e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb) E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) ) return 0; } - if ((skb->len > MINIMUM_DHCP_PACKET_SIZE) && (!skb->protocol)) { + if (skb->len > MINIMUM_DHCP_PACKET_SIZE) { struct ethhdr *eth = (struct ethhdr *) skb->data; if ((htons(ETH_P_IP) == eth->h_proto)) { const struct iphdr *ip = @@ -2881,11 +2766,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) unsigned int f; len -= skb->data_len; -#ifdef CONFIG_E1000_MQ - tx_ring = *per_cpu_ptr(adapter->cpu_tx_ring, smp_processor_id()); -#else tx_ring = adapter->tx_ring; -#endif if (unlikely(skb->len <= 0)) { dev_kfree_skb_any(skb); @@ -2905,21 +2786,29 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) max_per_txd = min(mss << 2, max_per_txd); max_txd_pwr = fls(max_per_txd) - 1; - /* TSO Workaround for 82571/2 Controllers -- if skb->data + /* TSO Workaround for 82571/2/3 Controllers -- if skb->data * points to just header, pull a few bytes of payload from * frags into skb->data */ hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - if (skb->data_len && (hdr_len == (skb->len - skb->data_len)) && - (adapter->hw.mac_type == e1000_82571 || - adapter->hw.mac_type == e1000_82572)) { - unsigned int pull_size; - pull_size = min((unsigned int)4, skb->data_len); - if (!__pskb_pull_tail(skb, pull_size)) { - printk(KERN_ERR "__pskb_pull_tail failed.\n"); - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) { + switch (adapter->hw.mac_type) { + unsigned int pull_size; + case e1000_82571: + case e1000_82572: + case e1000_82573: + pull_size = min((unsigned int)4, skb->data_len); + if (!__pskb_pull_tail(skb, pull_size)) { + printk(KERN_ERR + "__pskb_pull_tail failed.\n"); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + len = skb->len - skb->data_len; + break; + default: + /* do nothing */ + break; } - len = skb->len - skb->data_len; } } @@ -2935,7 +2824,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) #ifdef NETIF_F_TSO /* Controller Erratum workaround */ if (!skb->data_len && tx_ring->last_tx_tso && - !skb_shinfo(skb)->tso_size) + !skb_shinfo(skb)->tso_size) count++; #endif @@ -2958,7 +2847,9 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (adapter->pcix_82544) count += nr_frags; - if (adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) + + if (adapter->hw.tx_pkt_filtering && + (adapter->hw.mac_type == e1000_82573)) e1000_transfer_dhcp_info(adapter, skb); local_irq_save(flags); @@ -3036,15 +2927,15 @@ e1000_tx_timeout(struct net_device *netdev) struct e1000_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ - schedule_work(&adapter->tx_timeout_task); + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); } static void -e1000_tx_timeout_task(struct net_device *netdev) +e1000_reset_task(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); - adapter->tx_timeout_count++; e1000_down(adapter); e1000_up(adapter); } @@ -3079,6 +2970,7 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; + uint16_t eeprom_data = 0; if ((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { @@ -3090,14 +2982,28 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) switch (adapter->hw.mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: - case e1000_82573: if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) { DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n"); return -EINVAL; } break; + case e1000_82573: + /* only enable jumbo frames if ASPM is disabled completely + * this means both bits must be zero in 0x1A bits 3:2 */ + e1000_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1, + &eeprom_data); + if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) { + if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) { + DPRINTK(PROBE, ERR, + "Jumbo Frames not supported.\n"); + return -EINVAL; + } + break; + } + /* fall through to get support */ case e1000_82571: case e1000_82572: + case e1000_80003es2lan: #define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { DPRINTK(PROBE, ERR, "MTU > 9216 not supported.\n"); @@ -3251,11 +3157,15 @@ e1000_update_stats(struct e1000_adapter *adapter) /* Rx Errors */ + /* RLEC on some newer hardware can be incorrect so build + * our own version based on RUC and ROC */ adapter->net_stats.rx_errors = adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.rlec + adapter->stats.cexterr; + adapter->stats.ruc + adapter->stats.roc + + adapter->stats.cexterr; adapter->net_stats.rx_dropped = 0; - adapter->net_stats.rx_length_errors = adapter->stats.rlec; + adapter->net_stats.rx_length_errors = adapter->stats.ruc + + adapter->stats.roc; adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs; adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc; adapter->net_stats.rx_missed_errors = adapter->stats.mpc; @@ -3288,29 +3198,6 @@ e1000_update_stats(struct e1000_adapter *adapter) spin_unlock_irqrestore(&adapter->stats_lock, flags); } -#ifdef CONFIG_E1000_MQ -void -e1000_rx_schedule(void *data) -{ - struct net_device *poll_dev, *netdev = data; - struct e1000_adapter *adapter = netdev->priv; - int this_cpu = get_cpu(); - - poll_dev = *per_cpu_ptr(adapter->cpu_netdev, this_cpu); - if (poll_dev == NULL) { - put_cpu(); - return; - } - - if (likely(netif_rx_schedule_prep(poll_dev))) - __netif_rx_schedule(poll_dev); - else - e1000_irq_enable(adapter); - - put_cpu(); -} -#endif - /** * e1000_intr - Interrupt Handler * @irq: interrupt number @@ -3324,7 +3211,7 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - uint32_t icr = E1000_READ_REG(hw, ICR); + uint32_t rctl, icr = E1000_READ_REG(hw, ICR); #ifndef CONFIG_E1000_NAPI int i; #else @@ -3346,6 +3233,17 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { hw->get_link_status = 1; + /* 80003ES2LAN workaround-- + * For packet buffer work-around on link down event; + * disable receives here in the ISR and + * reset adapter in watchdog + */ + if (netif_carrier_ok(netdev) && + (adapter->hw.mac_type == e1000_80003es2lan)) { + /* disable receives */ + rctl = E1000_READ_REG(hw, RCTL); + E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN); + } mod_timer(&adapter->watchdog_timer, jiffies); } @@ -3355,26 +3253,11 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) E1000_WRITE_REG(hw, IMC, ~0); E1000_WRITE_FLUSH(hw); } -#ifdef CONFIG_E1000_MQ - if (atomic_read(&adapter->rx_sched_call_data.count) == 0) { - /* We must setup the cpumask once count == 0 since - * each cpu bit is cleared when the work is done. */ - adapter->rx_sched_call_data.cpumask = adapter->cpumask; - atomic_add(adapter->num_rx_queues - 1, &adapter->irq_sem); - atomic_set(&adapter->rx_sched_call_data.count, - adapter->num_rx_queues); - smp_call_async_mask(&adapter->rx_sched_call_data); - } else { - printk("call_data.count == %u\n", atomic_read(&adapter->rx_sched_call_data.count)); - } -#else /* if !CONFIG_E1000_MQ */ if (likely(netif_rx_schedule_prep(&adapter->polling_netdev[0]))) __netif_rx_schedule(&adapter->polling_netdev[0]); else e1000_irq_enable(adapter); -#endif /* CONFIG_E1000_MQ */ - -#else /* if !CONFIG_E1000_NAPI */ +#else /* Writing IMC and IMS is needed for 82547. * Due to Hub Link bus being occupied, an interrupt * de-assertion message is not able to be sent. @@ -3398,7 +3281,7 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) e1000_irq_enable(adapter); -#endif /* CONFIG_E1000_NAPI */ +#endif return IRQ_HANDLED; } @@ -3474,6 +3357,9 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_desc *tx_desc, *eop_desc; struct e1000_buffer *buffer_info; unsigned int i, eop; +#ifdef CONFIG_E1000_NAPI + unsigned int count = 0; +#endif boolean_t cleaned = FALSE; i = tx_ring->next_to_clean; @@ -3486,21 +3372,20 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, buffer_info = &tx_ring->buffer_info[i]; cleaned = (i == eop); -#ifdef CONFIG_E1000_MQ - tx_ring->tx_stats.bytes += buffer_info->length; -#endif e1000_unmap_and_free_tx_resource(adapter, buffer_info); memset(tx_desc, 0, sizeof(struct e1000_tx_desc)); if (unlikely(++i == tx_ring->count)) i = 0; } -#ifdef CONFIG_E1000_MQ - tx_ring->tx_stats.packets++; -#endif eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); +#ifdef CONFIG_E1000_NAPI +#define E1000_TX_WEIGHT 64 + /* weight of a sort for tx, to avoid endless transmit cleanup */ + if (count++ == E1000_TX_WEIGHT) break; +#endif } tx_ring->next_to_clean = i; @@ -3519,7 +3404,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, adapter->detect_tx_hung = FALSE; if (tx_ring->buffer_info[eop].dma && time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + - adapter->tx_timeout_factor * HZ) + (adapter->tx_timeout_factor * HZ)) && !(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF)) { @@ -3644,10 +3529,15 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, skb = buffer_info->skb; buffer_info->skb = NULL; + prefetch(skb->data - NET_IP_ALIGN); + if (++i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC(*rx_ring, i); + prefetch(next_rxd); + next_buffer = &rx_ring->buffer_info[i]; next_skb = next_buffer->skb; + prefetch(next_skb->data - NET_IP_ALIGN); cleaned = TRUE; cleaned_count++; @@ -3733,10 +3623,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; -#ifdef CONFIG_E1000_MQ - rx_ring->rx_stats.packets++; - rx_ring->rx_stats.bytes += length; -#endif next_desc: rx_desc->status = 0; @@ -3747,6 +3633,7 @@ next_desc: cleaned_count = 0; } + /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; } @@ -3789,9 +3676,9 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC_PS(*rx_ring, i); staterr = le32_to_cpu(rx_desc->wb.middle.status_error); - buffer_info = &rx_ring->buffer_info[i]; while (staterr & E1000_RXD_STAT_DD) { + buffer_info = &rx_ring->buffer_info[i]; ps_page = &rx_ring->ps_page[i]; ps_page_dma = &rx_ring->ps_page_dma[i]; #ifdef CONFIG_E1000_NAPI @@ -3801,10 +3688,16 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, #endif skb = buffer_info->skb; + /* in the packet split case this is header only */ + prefetch(skb->data - NET_IP_ALIGN); + if (++i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC_PS(*rx_ring, i); + prefetch(next_rxd); + next_buffer = &rx_ring->buffer_info[i]; next_skb = next_buffer->skb; + prefetch(next_skb->data - NET_IP_ALIGN); cleaned = TRUE; cleaned_count++; @@ -3836,23 +3729,49 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, /* Good Receive */ skb_put(skb, length); + { + /* this looks ugly, but it seems compiler issues make it + more efficient than reusing j */ + int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]); + + /* page alloc/put takes too long and effects small packet + * throughput, so unsplit small packets and save the alloc/put*/ + if (l1 && ((length + l1) < E1000_CB_LENGTH)) { + u8 *vaddr; + /* there is no documentation about how to call + * kmap_atomic, so we can't hold the mapping + * very long */ + pci_dma_sync_single_for_cpu(pdev, + ps_page_dma->ps_page_dma[0], + PAGE_SIZE, + PCI_DMA_FROMDEVICE); + vaddr = kmap_atomic(ps_page->ps_page[0], + KM_SKB_DATA_SOFTIRQ); + memcpy(skb->tail, vaddr, l1); + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); + pci_dma_sync_single_for_device(pdev, + ps_page_dma->ps_page_dma[0], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + skb_put(skb, l1); + length += l1; + goto copydone; + } /* if */ + } + for (j = 0; j < adapter->rx_ps_pages; j++) { - if (!(length = le16_to_cpu(rx_desc->wb.upper.length[j]))) + if (!(length= le16_to_cpu(rx_desc->wb.upper.length[j]))) break; - pci_unmap_page(pdev, ps_page_dma->ps_page_dma[j], PAGE_SIZE, PCI_DMA_FROMDEVICE); ps_page_dma->ps_page_dma[j] = 0; - skb_shinfo(skb)->frags[j].page = - ps_page->ps_page[j]; + skb_fill_page_desc(skb, j, ps_page->ps_page[j], 0, + length); ps_page->ps_page[j] = NULL; - skb_shinfo(skb)->frags[j].page_offset = 0; - skb_shinfo(skb)->frags[j].size = length; - skb_shinfo(skb)->nr_frags++; skb->len += length; skb->data_len += length; } +copydone: e1000_rx_checksum(adapter, staterr, le16_to_cpu(rx_desc->wb.lower.hi_dword.csum_ip.csum), skb); skb->protocol = eth_type_trans(skb, netdev); @@ -3878,10 +3797,6 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; -#ifdef CONFIG_E1000_MQ - rx_ring->rx_stats.packets++; - rx_ring->rx_stats.bytes += length; -#endif next_desc: rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF); @@ -3893,6 +3808,7 @@ next_desc: cleaned_count = 0; } + /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; @@ -3936,7 +3852,6 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, goto map_skb; } - if (unlikely(!skb)) { /* Better luck next round */ adapter->alloc_rx_buff_failed++; @@ -4242,7 +4157,7 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) spin_unlock_irqrestore(&adapter->stats_lock, flags); return -EIO; } - if (adapter->hw.phy_type == e1000_phy_m88) { + if (adapter->hw.phy_type == e1000_media_type_copper) { switch (data->reg_num) { case PHY_CTRL: if (mii_reg & MII_CR_POWER_DOWN) @@ -4258,8 +4173,8 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) else spddplx = SPEED_10; spddplx += (mii_reg & 0x100) - ? FULL_DUPLEX : - HALF_DUPLEX; + ? DUPLEX_FULL : + DUPLEX_HALF; retval = e1000_set_spd_dplx(adapter, spddplx); if (retval) { @@ -4489,8 +4404,8 @@ e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx) } #ifdef CONFIG_PM -/* these functions save and restore 16 or 64 dwords (64-256 bytes) of config - * space versus the 64 bytes that pci_[save|restore]_state handle +/* Save/restore 16 or 64 dwords of PCI config space depending on which + * bus we're on (PCI(X) vs. PCI-E) */ #define PCIE_CONFIG_SPACE_LEN 256 #define PCI_CONFIG_SPACE_LEN 64 @@ -4500,6 +4415,7 @@ e1000_pci_save_state(struct e1000_adapter *adapter) struct pci_dev *dev = adapter->pdev; int size; int i; + if (adapter->hw.mac_type >= e1000_82571) size = PCIE_CONFIG_SPACE_LEN; else @@ -4523,8 +4439,10 @@ e1000_pci_restore_state(struct e1000_adapter *adapter) struct pci_dev *dev = adapter->pdev; int size; int i; + if (adapter->config_space == NULL) return; + if (adapter->hw.mac_type >= e1000_82571) size = PCIE_CONFIG_SPACE_LEN; else @@ -4552,8 +4470,8 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) e1000_down(adapter); #ifdef CONFIG_PM - /* implement our own version of pci_save_state(pdev) because pci - * express adapters have larger 256 byte config spaces */ + /* Implement our own version of pci_save_state(pdev) because pci- + * express adapters have 256-byte config spaces. */ retval = e1000_pci_save_state(adapter); if (retval) return retval; @@ -4610,7 +4528,7 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) retval = pci_enable_wake(pdev, PCI_D3hot, 0); if (retval) DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); - retval = pci_enable_wake(pdev, PCI_D3cold, 0); /* 4 == D3 cold */ + retval = pci_enable_wake(pdev, PCI_D3cold, 0); if (retval) DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); } @@ -4626,7 +4544,8 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); retval = pci_enable_wake(pdev, PCI_D3cold, 1); if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); + DPRINTK(PROBE, ERR, + "Error enabling D3 cold wake\n"); } } diff --git a/drivers/net/e1000/e1000_param.c b/drivers/net/e1000/e1000_param.c index 3768d83..e0a4d37 100644 --- a/drivers/net/e1000/e1000_param.c +++ b/drivers/net/e1000/e1000_param.c @@ -268,7 +268,7 @@ e1000_validate_option(int *value, struct e1000_option *opt, BUG(); } - DPRINTK(PROBE, INFO, "Invalid %s specified (%i) %s\n", + DPRINTK(PROBE, INFO, "Invalid %s value specified (%i) %s\n", opt->name, *value, opt->err); *value = opt->def; return -1; diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c index 8c62ced..467fc86 100644 --- a/drivers/net/eepro100.c +++ b/drivers/net/eepro100.c @@ -27,7 +27,7 @@ rx_align support: enables rx DMA without causing unaligned accesses. */ -static const char *version = +static const char * const version = "eepro100.c:v1.09j-t 9/29/99 Donald Becker http://www.scyld.com/network/eepro100.html\n" "eepro100.c: $Revision: 1.36 $ 2000/11/17 Modified by Andrey V. Savochkin <saw@saw.sw.com.sg> and others\n"; @@ -469,7 +469,7 @@ static const char i82558_config_cmd[CONFIG_DATA_SIZE] = { 0x31, 0x05, }; /* PHY media interface chips. */ -static const char *phys[] = { +static const char * const phys[] = { "None", "i82553-A/B", "i82553-C", "i82503", "DP83840", "80c240", "80c24", "i82555", "unknown-8", "unknown-9", "DP83840A", "unknown-11", diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index f119ec4..2f7b868 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -225,7 +225,7 @@ struct epic_chip_info { /* indexed by chip_t */ -static struct epic_chip_info pci_id_tbl[] = { +static const struct epic_chip_info pci_id_tbl[] = { { "SMSC EPIC/100 83c170", EPIC_IOTYPE, EPIC_TOTAL_SIZE, TYPE2_INTR | NO_MII | MII_PWRDWN }, { "SMSC EPIC/100 83c170", @@ -291,7 +291,7 @@ enum CommandBits { RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull) #define EpicNormalEvent (0x0000ffff & ~EpicNapiEvent) -static u16 media2miictl[16] = { +static const u16 media2miictl[16] = { 0, 0x0C00, 0x0C00, 0x2000, 0x0100, 0x2100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c index f32a6b3..b67545b 100644 --- a/drivers/net/eth16i.c +++ b/drivers/net/eth16i.c @@ -161,6 +161,7 @@ static char *version = #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/io.h> @@ -754,7 +755,7 @@ static void eth16i_set_port(int ioaddr, int porttype) static int eth16i_send_probe_packet(int ioaddr, unsigned char *b, int l) { - int starttime; + unsigned long starttime; outb(0xff, ioaddr + TX_STATUS_REG); @@ -765,7 +766,7 @@ static int eth16i_send_probe_packet(int ioaddr, unsigned char *b, int l) outb(TX_START | 1, ioaddr + TRANSMIT_START_REG); while( (inb(ioaddr + TX_STATUS_REG) & 0x80) == 0) { - if( (jiffies - starttime) > TX_TIMEOUT) { + if( time_after(jiffies, starttime + TX_TIMEOUT)) { return -1; } } @@ -775,18 +776,18 @@ static int eth16i_send_probe_packet(int ioaddr, unsigned char *b, int l) static int eth16i_receive_probe_packet(int ioaddr) { - int starttime; + unsigned long starttime; starttime = jiffies; while((inb(ioaddr + TX_STATUS_REG) & 0x20) == 0) { - if( (jiffies - starttime) > TX_TIMEOUT) { + if( time_after(jiffies, starttime + TX_TIMEOUT)) { if(eth16i_debug > 1) printk(KERN_DEBUG "Timeout occurred waiting transmit packet received\n"); starttime = jiffies; while((inb(ioaddr + RX_STATUS_REG) & 0x80) == 0) { - if( (jiffies - starttime) > TX_TIMEOUT) { + if( time_after(jiffies, starttime + TX_TIMEOUT)) { if(eth16i_debug > 1) printk(KERN_DEBUG "Timeout occurred waiting receive packet\n"); return -1; diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c index 55dbe9a..a844926 100644 --- a/drivers/net/fealnx.c +++ b/drivers/net/fealnx.c @@ -160,7 +160,7 @@ struct chip_info { int flags; }; -static struct chip_info skel_netdrv_tbl[] = { +static const struct chip_info skel_netdrv_tbl[] = { {"100/10M Ethernet PCI Adapter", 136, HAS_MII_XCVR}, {"100/10M Ethernet PCI Adapter", 136, HAS_CHIP_XCVR}, {"1000/100/10M Ethernet PCI Adapter", 136, HAS_MII_XCVR}, diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 3682ec6..e7fc28b 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -102,6 +102,9 @@ * 0.47: 26 Oct 2005: Add phyaddr 0 in phy scan. * 0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single * 0.49: 10 Dec 2005: Fix tso for large buffers. + * 0.50: 20 Jan 2006: Add 8021pq tagging support. + * 0.51: 20 Jan 2006: Add 64bit consistent memory allocation for rings. + * 0.52: 20 Jan 2006: Add MSI/MSIX support. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -113,7 +116,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.49" +#define FORCEDETH_VERSION "0.52" #define DRV_NAME "forcedeth" #include <linux/module.h> @@ -153,6 +156,9 @@ #define DEV_HAS_LARGEDESC 0x0004 /* device supports jumbo frames and needs packet format 2 */ #define DEV_HAS_HIGH_DMA 0x0008 /* device supports 64bit dma */ #define DEV_HAS_CHECKSUM 0x0010 /* device supports tx and rx checksum offloads */ +#define DEV_HAS_VLAN 0x0020 /* device supports vlan tagging and striping */ +#define DEV_HAS_MSI 0x0040 /* device supports MSI */ +#define DEV_HAS_MSI_X 0x0080 /* device supports MSI-X */ enum { NvRegIrqStatus = 0x000, @@ -166,14 +172,17 @@ enum { #define NVREG_IRQ_TX_OK 0x0010 #define NVREG_IRQ_TIMER 0x0020 #define NVREG_IRQ_LINK 0x0040 -#define NVREG_IRQ_TX_ERROR 0x0080 -#define NVREG_IRQ_TX1 0x0100 +#define NVREG_IRQ_RX_FORCED 0x0080 +#define NVREG_IRQ_TX_FORCED 0x0100 #define NVREG_IRQMASK_THROUGHPUT 0x00df #define NVREG_IRQMASK_CPU 0x0040 +#define NVREG_IRQ_TX_ALL (NVREG_IRQ_TX_ERR|NVREG_IRQ_TX_OK|NVREG_IRQ_TX_FORCED) +#define NVREG_IRQ_RX_ALL (NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_RX_FORCED) +#define NVREG_IRQ_OTHER (NVREG_IRQ_TIMER|NVREG_IRQ_LINK) #define NVREG_IRQ_UNKNOWN (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR| \ - NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX_ERROR| \ - NVREG_IRQ_TX1)) + NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_RX_FORCED| \ + NVREG_IRQ_TX_FORCED)) NvRegUnknownSetupReg6 = 0x008, #define NVREG_UNKSETUP6_VAL 3 @@ -185,6 +194,10 @@ enum { NvRegPollingInterval = 0x00c, #define NVREG_POLL_DEFAULT_THROUGHPUT 970 #define NVREG_POLL_DEFAULT_CPU 13 + NvRegMSIMap0 = 0x020, + NvRegMSIMap1 = 0x024, + NvRegMSIIrqMask = 0x030, +#define NVREG_MSI_VECTOR_0_ENABLED 0x01 NvRegMisc1 = 0x080, #define NVREG_MISC1_HD 0x02 #define NVREG_MISC1_FORCE 0x3b0f3c @@ -254,6 +267,10 @@ enum { #define NVREG_TXRXCTL_DESC_1 0 #define NVREG_TXRXCTL_DESC_2 0x02100 #define NVREG_TXRXCTL_DESC_3 0x02200 +#define NVREG_TXRXCTL_VLANSTRIP 0x00040 +#define NVREG_TXRXCTL_VLANINS 0x00080 + NvRegTxRingPhysAddrHigh = 0x148, + NvRegRxRingPhysAddrHigh = 0x14C, NvRegMIIStatus = 0x180, #define NVREG_MIISTAT_ERROR 0x0001 #define NVREG_MIISTAT_LINKCHANGE 0x0008 @@ -303,6 +320,11 @@ enum { #define NVREG_POWERSTATE_D1 0x0001 #define NVREG_POWERSTATE_D2 0x0002 #define NVREG_POWERSTATE_D3 0x0003 + NvRegVlanControl = 0x300, +#define NVREG_VLANCONTROL_ENABLE 0x2000 + NvRegMSIXMap0 = 0x3e0, + NvRegMSIXMap1 = 0x3e4, + NvRegMSIXIrqStatus = 0x3f0, }; /* Big endian: should work, but is untested */ @@ -314,7 +336,7 @@ struct ring_desc { struct ring_desc_ex { u32 PacketBufferHigh; u32 PacketBufferLow; - u32 Reserved; + u32 TxVlan; u32 FlagLen; }; @@ -355,6 +377,8 @@ typedef union _ring_type { #define NV_TX2_CHECKSUM_L3 (1<<27) #define NV_TX2_CHECKSUM_L4 (1<<26) +#define NV_TX3_VLAN_TAG_PRESENT (1<<18) + #define NV_RX_DESCRIPTORVALID (1<<16) #define NV_RX_MISSEDFRAME (1<<17) #define NV_RX_SUBSTRACT1 (1<<18) @@ -385,6 +409,9 @@ typedef union _ring_type { #define NV_RX2_ERROR (1<<30) #define NV_RX2_AVAIL (1<<31) +#define NV_RX3_VLAN_TAG_PRESENT (1<<16) +#define NV_RX3_VLAN_TAG_MASK (0x0000FFFF) + /* Miscelaneous hardware related defines: */ #define NV_PCI_REGSZ 0x270 @@ -475,6 +502,18 @@ typedef union _ring_type { #define LPA_1000FULL 0x0800 #define LPA_1000HALF 0x0400 +/* MSI/MSI-X defines */ +#define NV_MSI_X_MAX_VECTORS 8 +#define NV_MSI_X_VECTORS_MASK 0x000f +#define NV_MSI_CAPABLE 0x0010 +#define NV_MSI_X_CAPABLE 0x0020 +#define NV_MSI_ENABLED 0x0040 +#define NV_MSI_X_ENABLED 0x0080 + +#define NV_MSI_X_VECTOR_ALL 0x0 +#define NV_MSI_X_VECTOR_RX 0x0 +#define NV_MSI_X_VECTOR_TX 0x1 +#define NV_MSI_X_VECTOR_OTHER 0x2 /* * SMP locking: @@ -511,6 +550,7 @@ struct fe_priv { u32 irqmask; u32 desc_ver; u32 txrxctl_bits; + u32 vlanctl_bits; void __iomem *base; @@ -525,6 +565,7 @@ struct fe_priv { unsigned int pkt_limit; struct timer_list oom_kick; struct timer_list nic_poll; + u32 nic_poll_irq; /* media detection workaround. * Locking: Within irq hander or disable_irq+spin_lock(&np->lock); @@ -540,6 +581,13 @@ struct fe_priv { dma_addr_t tx_dma[TX_RING]; unsigned int tx_dma_len[TX_RING]; u32 tx_flags; + + /* vlan fields */ + struct vlan_group *vlangrp; + + /* msi/msi-x fields */ + u32 msi_flags; + struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS]; }; /* @@ -567,6 +615,16 @@ static int optimization_mode = NV_OPTIMIZATION_MODE_THROUGHPUT; */ static int poll_interval = -1; +/* + * Disable MSI interrupts + */ +static int disable_msi = 0; + +/* + * Disable MSIX interrupts + */ +static int disable_msix = 0; + static inline struct fe_priv *get_nvpriv(struct net_device *dev) { return netdev_priv(dev); @@ -612,6 +670,33 @@ static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target, return 0; } +#define NV_SETUP_RX_RING 0x01 +#define NV_SETUP_TX_RING 0x02 + +static void setup_hw_rings(struct net_device *dev, int rxtx_flags) +{ + struct fe_priv *np = get_nvpriv(dev); + u8 __iomem *base = get_hwbase(dev); + + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + if (rxtx_flags & NV_SETUP_RX_RING) { + writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr); + } + if (rxtx_flags & NV_SETUP_TX_RING) { + writel((u32) cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + } + } else { + if (rxtx_flags & NV_SETUP_RX_RING) { + writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr); + writel((u32) (cpu_to_le64(np->ring_addr) >> 32), base + NvRegRxRingPhysAddrHigh); + } + if (rxtx_flags & NV_SETUP_TX_RING) { + writel((u32) cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + writel((u32) (cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)) >> 32), base + NvRegTxRingPhysAddrHigh); + } + } +} + #define MII_READ (-1) /* mii_rw: read/write a register on the PHY. * @@ -903,14 +988,27 @@ static void nv_do_rx_refill(unsigned long data) struct net_device *dev = (struct net_device *) data; struct fe_priv *np = netdev_priv(dev); - disable_irq(dev->irq); + + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + disable_irq(dev->irq); + } else { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } if (nv_alloc_rx(dev)) { spin_lock(&np->lock); if (!np->in_shutdown) mod_timer(&np->oom_kick, jiffies + OOM_REFILL); spin_unlock(&np->lock); } - enable_irq(dev->irq); + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + enable_irq(dev->irq); + } else { + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } } static void nv_init_rx(struct net_device *dev) @@ -965,7 +1063,7 @@ static int nv_release_txskb(struct net_device *dev, unsigned int skbnr) } if (np->tx_skbuff[skbnr]) { - dev_kfree_skb_irq(np->tx_skbuff[skbnr]); + dev_kfree_skb_any(np->tx_skbuff[skbnr]); np->tx_skbuff[skbnr] = NULL; return 1; } else { @@ -1031,6 +1129,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 bcnt; u32 size = skb->len-skb->data_len; u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); + u32 tx_flags_vlan = 0; /* add fragments to entries count */ for (i = 0; i < fragments; i++) { @@ -1111,10 +1210,16 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) #endif tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); + /* vlan tag */ + if (np->vlangrp && vlan_tx_tag_present(skb)) { + tx_flags_vlan = NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb); + } + /* set tx flags */ if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { np->tx_ring.orig[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra); } else { + np->tx_ring.ex[start_nr].TxVlan = cpu_to_le32(tx_flags_vlan); np->tx_ring.ex[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra); } @@ -1209,9 +1314,14 @@ static void nv_tx_timeout(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); + u32 status; + + if (np->msi_flags & NV_MSI_X_ENABLED) + status = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK; + else + status = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK; - printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name, - readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK); + printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name, status); { int i; @@ -1273,10 +1383,7 @@ static void nv_tx_timeout(struct net_device *dev) printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name); nv_drain_tx(dev); np->next_tx = np->nic_tx = 0; - if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); - else - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + setup_hw_rings(dev, NV_SETUP_TX_RING); netif_wake_queue(dev); } @@ -1342,6 +1449,8 @@ static void nv_rx_process(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u32 Flags; + u32 vlanflags = 0; + for (;;) { struct sk_buff *skb; @@ -1357,6 +1466,7 @@ static void nv_rx_process(struct net_device *dev) } else { Flags = le32_to_cpu(np->rx_ring.ex[i].FlagLen); len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver); + vlanflags = le32_to_cpu(np->rx_ring.ex[i].PacketBufferLow); } dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, Flags 0x%x.\n", @@ -1474,7 +1584,11 @@ static void nv_rx_process(struct net_device *dev) skb->protocol = eth_type_trans(skb, dev); dprintk(KERN_DEBUG "%s: nv_rx_process: packet %d with %d bytes, proto %d accepted.\n", dev->name, np->cur_rx, len, skb->protocol); - netif_rx(skb); + if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) { + vlan_hwaccel_rx(skb, np->vlangrp, vlanflags & NV_RX3_VLAN_TAG_MASK); + } else { + netif_rx(skb); + } dev->last_rx = jiffies; np->stats.rx_packets++; np->stats.rx_bytes += len; @@ -1523,7 +1637,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) * guessed, there is probably a simpler approach. * Changing the MTU is a rare event, it shouldn't matter. */ - disable_irq(dev->irq); + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + disable_irq(dev->irq); + } else { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + } spin_lock_bh(&dev->xmit_lock); spin_lock(&np->lock); /* stop engines */ @@ -1544,11 +1666,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) } /* reinit nic view of the rx queue */ writel(np->rx_buf_sz, base + NvRegOffloadConfig); - writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); - if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); - else - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING); writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), base + NvRegRingSizes); pci_push(base); @@ -1560,7 +1678,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) nv_start_tx(dev); spin_unlock(&np->lock); spin_unlock_bh(&dev->xmit_lock); - enable_irq(dev->irq); + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + enable_irq(dev->irq); + } else { + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + } } return 0; } @@ -1866,8 +1992,13 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) dprintk(KERN_DEBUG "%s: nv_nic_irq\n", dev->name); for (i=0; ; i++) { - events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK; - writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus); + if (!(np->msi_flags & NV_MSI_X_ENABLED)) { + events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK; + writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus); + } else { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK; + writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus); + } pci_push(base); dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events); if (!(events & np->irqmask)) @@ -1907,11 +2038,16 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) if (i > max_interrupt_work) { spin_lock(&np->lock); /* disable interrupts on the nic */ - writel(0, base + NvRegIrqMask); + if (!(np->msi_flags & NV_MSI_X_ENABLED)) + writel(0, base + NvRegIrqMask); + else + writel(np->irqmask, base + NvRegIrqMask); pci_push(base); - if (!np->in_shutdown) + if (!np->in_shutdown) { + np->nic_poll_irq = np->irqmask; mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq.\n", dev->name, i); spin_unlock(&np->lock); break; @@ -1923,22 +2059,212 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) return IRQ_RETVAL(i); } +static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 events; + int i; + + dprintk(KERN_DEBUG "%s: nv_nic_irq_tx\n", dev->name); + + for (i=0; ; i++) { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL; + writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus); + pci_push(base); + dprintk(KERN_DEBUG "%s: tx irq: %08x\n", dev->name, events); + if (!(events & np->irqmask)) + break; + + spin_lock(&np->lock); + nv_tx_done(dev); + spin_unlock(&np->lock); + + if (events & (NVREG_IRQ_TX_ERR)) { + dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n", + dev->name, events); + } + if (i > max_interrupt_work) { + spin_lock(&np->lock); + /* disable interrupts on the nic */ + writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask); + pci_push(base); + + if (!np->in_shutdown) { + np->nic_poll_irq |= NVREG_IRQ_TX_ALL; + mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } + printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i); + spin_unlock(&np->lock); + break; + } + + } + dprintk(KERN_DEBUG "%s: nv_nic_irq_tx completed\n", dev->name); + + return IRQ_RETVAL(i); +} + +static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 events; + int i; + + dprintk(KERN_DEBUG "%s: nv_nic_irq_rx\n", dev->name); + + for (i=0; ; i++) { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL; + writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus); + pci_push(base); + dprintk(KERN_DEBUG "%s: rx irq: %08x\n", dev->name, events); + if (!(events & np->irqmask)) + break; + + nv_rx_process(dev); + if (nv_alloc_rx(dev)) { + spin_lock(&np->lock); + if (!np->in_shutdown) + mod_timer(&np->oom_kick, jiffies + OOM_REFILL); + spin_unlock(&np->lock); + } + + if (i > max_interrupt_work) { + spin_lock(&np->lock); + /* disable interrupts on the nic */ + writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask); + pci_push(base); + + if (!np->in_shutdown) { + np->nic_poll_irq |= NVREG_IRQ_RX_ALL; + mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } + printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i); + spin_unlock(&np->lock); + break; + } + + } + dprintk(KERN_DEBUG "%s: nv_nic_irq_rx completed\n", dev->name); + + return IRQ_RETVAL(i); +} + +static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 events; + int i; + + dprintk(KERN_DEBUG "%s: nv_nic_irq_other\n", dev->name); + + for (i=0; ; i++) { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_OTHER; + writel(NVREG_IRQ_OTHER, base + NvRegMSIXIrqStatus); + pci_push(base); + dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events); + if (!(events & np->irqmask)) + break; + + if (events & NVREG_IRQ_LINK) { + spin_lock(&np->lock); + nv_link_irq(dev); + spin_unlock(&np->lock); + } + if (np->need_linktimer && time_after(jiffies, np->link_timeout)) { + spin_lock(&np->lock); + nv_linkchange(dev); + spin_unlock(&np->lock); + np->link_timeout = jiffies + LINK_TIMEOUT; + } + if (events & (NVREG_IRQ_UNKNOWN)) { + printk(KERN_DEBUG "%s: received irq with unknown events 0x%x. Please report\n", + dev->name, events); + } + if (i > max_interrupt_work) { + spin_lock(&np->lock); + /* disable interrupts on the nic */ + writel(NVREG_IRQ_OTHER, base + NvRegIrqMask); + pci_push(base); + + if (!np->in_shutdown) { + np->nic_poll_irq |= NVREG_IRQ_OTHER; + mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } + printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i); + spin_unlock(&np->lock); + break; + } + + } + dprintk(KERN_DEBUG "%s: nv_nic_irq_other completed\n", dev->name); + + return IRQ_RETVAL(i); +} + static void nv_do_nic_poll(unsigned long data) { struct net_device *dev = (struct net_device *) data; struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); + u32 mask = 0; - disable_irq(dev->irq); - /* FIXME: Do we need synchronize_irq(dev->irq) here? */ /* + * First disable irq(s) and then * reenable interrupts on the nic, we have to do this before calling * nv_nic_irq because that may decide to do otherwise */ - writel(np->irqmask, base + NvRegIrqMask); + + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + disable_irq(dev->irq); + mask = np->irqmask; + } else { + if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + mask |= NVREG_IRQ_RX_ALL; + } + if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + mask |= NVREG_IRQ_TX_ALL; + } + if (np->nic_poll_irq & NVREG_IRQ_OTHER) { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + mask |= NVREG_IRQ_OTHER; + } + } + np->nic_poll_irq = 0; + + /* FIXME: Do we need synchronize_irq(dev->irq) here? */ + + writel(mask, base + NvRegIrqMask); pci_push(base); - nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); - enable_irq(dev->irq); + + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(dev->irq); + } else { + if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { + nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } + if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { + nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + } + if (np->nic_poll_irq & NVREG_IRQ_OTHER) { + nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + } + } } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2217,11 +2543,66 @@ static struct ethtool_ops ops = { .get_perm_addr = ethtool_op_get_perm_addr, }; +static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +{ + struct fe_priv *np = get_nvpriv(dev); + + spin_lock_irq(&np->lock); + + /* save vlan group */ + np->vlangrp = grp; + + if (grp) { + /* enable vlan on MAC */ + np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS; + } else { + /* disable vlan on MAC */ + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP; + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS; + } + + writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl); + + spin_unlock_irq(&np->lock); +}; + +static void nv_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + /* nothing to do */ +}; + +static void set_msix_vector_map(struct net_device *dev, u32 vector, u32 irqmask) +{ + u8 __iomem *base = get_hwbase(dev); + int i; + u32 msixmap = 0; + + /* Each interrupt bit can be mapped to a MSIX vector (4 bits). + * MSIXMap0 represents the first 8 interrupts and MSIXMap1 represents + * the remaining 8 interrupts. + */ + for (i = 0; i < 8; i++) { + if ((irqmask >> i) & 0x1) { + msixmap |= vector << (i << 2); + } + } + writel(readl(base + NvRegMSIXMap0) | msixmap, base + NvRegMSIXMap0); + + msixmap = 0; + for (i = 0; i < 8; i++) { + if ((irqmask >> (i + 8)) & 0x1) { + msixmap |= vector << (i << 2); + } + } + writel(readl(base + NvRegMSIXMap1) | msixmap, base + NvRegMSIXMap1); +} + static int nv_open(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); - int ret, oom, i; + int ret = 1; + int oom, i; dprintk(KERN_DEBUG "nv_open: begin\n"); @@ -2253,11 +2634,7 @@ static int nv_open(struct net_device *dev) nv_copy_mac_to_hw(dev); /* 4) give hw rings */ - writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); - if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); - else - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING); writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), base + NvRegRingSizes); @@ -2265,6 +2642,7 @@ static int nv_open(struct net_device *dev) writel(np->linkspeed, base + NvRegLinkSpeed); writel(NVREG_UNKSETUP3_VAL1, base + NvRegUnknownSetupReg3); writel(np->txrxctl_bits, base + NvRegTxRxControl); + writel(np->vlanctl_bits, base + NvRegVlanControl); pci_push(base); writel(NVREG_TXRXCTL_BIT1|np->txrxctl_bits, base + NvRegTxRxControl); reg_delay(dev, NvRegUnknownSetupReg5, NVREG_UNKSETUP5_BIT31, NVREG_UNKSETUP5_BIT31, @@ -2315,9 +2693,77 @@ static int nv_open(struct net_device *dev) writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus); pci_push(base); - ret = request_irq(dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev); - if (ret) - goto out_drain; + if (np->msi_flags & NV_MSI_X_CAPABLE) { + for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) { + np->msi_x_entry[i].entry = i; + } + if ((ret = pci_enable_msix(np->pci_dev, np->msi_x_entry, (np->msi_flags & NV_MSI_X_VECTORS_MASK))) == 0) { + np->msi_flags |= NV_MSI_X_ENABLED; + if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) { + /* Request irq for rx handling */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector, &nv_nic_irq_rx, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed for rx %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + /* Request irq for tx handling */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector, &nv_nic_irq_tx, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed for tx %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + /* Request irq for link and timer handling */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector, &nv_nic_irq_other, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed for link %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + + /* map interrupts to their respective vector */ + writel(0, base + NvRegMSIXMap0); + writel(0, base + NvRegMSIXMap1); + set_msix_vector_map(dev, NV_MSI_X_VECTOR_RX, NVREG_IRQ_RX_ALL); + set_msix_vector_map(dev, NV_MSI_X_VECTOR_TX, NVREG_IRQ_TX_ALL); + set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER); + } else { + /* Request irq for all interrupts */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + + /* map interrupts to vector 0 */ + writel(0, base + NvRegMSIXMap0); + writel(0, base + NvRegMSIXMap1); + } + } + } + if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) { + if ((ret = pci_enable_msi(np->pci_dev)) == 0) { + np->msi_flags |= NV_MSI_ENABLED; + if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); + pci_disable_msi(np->pci_dev); + np->msi_flags &= ~NV_MSI_ENABLED; + goto out_drain; + } + + /* map interrupts to vector 0 */ + writel(0, base + NvRegMSIMap0); + writel(0, base + NvRegMSIMap1); + /* enable msi vector 0 */ + writel(NVREG_MSI_VECTOR_0_ENABLED, base + NvRegMSIIrqMask); + } + } + if (ret != 0) { + if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) + goto out_drain; + } /* ask for interrupts */ writel(np->irqmask, base + NvRegIrqMask); @@ -2364,6 +2810,7 @@ static int nv_close(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base; + int i; spin_lock_irq(&np->lock); np->in_shutdown = 1; @@ -2381,13 +2828,31 @@ static int nv_close(struct net_device *dev) /* disable interrupts on the nic or we will lock up */ base = get_hwbase(dev); - writel(0, base + NvRegIrqMask); + if (np->msi_flags & NV_MSI_X_ENABLED) { + writel(np->irqmask, base + NvRegIrqMask); + } else { + if (np->msi_flags & NV_MSI_ENABLED) + writel(0, base + NvRegMSIIrqMask); + writel(0, base + NvRegIrqMask); + } pci_push(base); dprintk(KERN_INFO "%s: Irqmask is zero again\n", dev->name); spin_unlock_irq(&np->lock); - free_irq(dev->irq, dev); + if (np->msi_flags & NV_MSI_X_ENABLED) { + for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) { + free_irq(np->msi_x_entry[i].vector, dev); + } + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + } else { + free_irq(np->pci_dev->irq, dev); + if (np->msi_flags & NV_MSI_ENABLED) { + pci_disable_msi(np->pci_dev); + np->msi_flags &= ~NV_MSI_ENABLED; + } + } drain_ring(dev); @@ -2471,7 +2936,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n", pci_name(pci_dev)); } else { - dev->features |= NETIF_F_HIGHDMA; + if (pci_set_consistent_dma_mask(pci_dev, 0x0000007fffffffffULL)) { + printk(KERN_INFO "forcedeth: 64-bit DMA (consistent) failed for device %s.\n", + pci_name(pci_dev)); + goto out_relreg; + } else { + dev->features |= NETIF_F_HIGHDMA; + printk(KERN_INFO "forcedeth: using HIGHDMA\n"); + } } np->txrxctl_bits = NVREG_TXRXCTL_DESC_3; } else if (id->driver_data & DEV_HAS_LARGEDESC) { @@ -2496,6 +2968,22 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i #endif } + np->vlanctl_bits = 0; + if (id->driver_data & DEV_HAS_VLAN) { + np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; + dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->vlan_rx_register = nv_vlan_rx_register; + dev->vlan_rx_kill_vid = nv_vlan_rx_kill_vid; + } + + np->msi_flags = 0; + if ((id->driver_data & DEV_HAS_MSI) && !disable_msi) { + np->msi_flags |= NV_MSI_CAPABLE; + } + if ((id->driver_data & DEV_HAS_MSI_X) && !disable_msix) { + np->msi_flags |= NV_MSI_X_CAPABLE; + } + err = -ENOMEM; np->base = ioremap(addr, NV_PCI_REGSZ); if (!np->base) @@ -2578,10 +3066,15 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i } else { np->tx_flags = NV_TX2_VALID; } - if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) + if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) { np->irqmask = NVREG_IRQMASK_THROUGHPUT; - else + if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */ + np->msi_flags |= 0x0003; + } else { np->irqmask = NVREG_IRQMASK_CPU; + if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */ + np->msi_flags |= 0x0001; + } if (id->driver_data & DEV_NEED_TIMERIRQ) np->irqmask |= NVREG_IRQ_TIMER; @@ -2737,11 +3230,11 @@ static struct pci_device_id pci_tbl[] = { }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X, }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X, }, {0,}, }; @@ -2771,6 +3264,10 @@ module_param(optimization_mode, int, 0); MODULE_PARM_DESC(optimization_mode, "In throughput mode (0), every tx & rx packet will generate an interrupt. In CPU mode (1), interrupts are controlled by a timer."); module_param(poll_interval, int, 0); MODULE_PARM_DESC(poll_interval, "Interval determines how frequent timer interrupt is generated by [(time_in_micro_secs * 100) / (2^10)]. Min is 0 and Max is 65535."); +module_param(disable_msi, int, 0); +MODULE_PARM_DESC(disable_msi, "Disable MSI interrupts by setting to 1."); +module_param(disable_msix, int, 0); +MODULE_PARM_DESC(disable_msix, "Disable MSIX interrupts by setting to 1."); MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>"); MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver"); diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index bc9a3bf..0ea4cb4 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -427,7 +427,7 @@ that case. static void hamachi_timer(unsigned long data); enum capability_flags {CanHaveMII=1, }; -static struct chip_info { +static const struct chip_info { u16 vendor_id, device_id, device_id_mask, pad; const char *name; void (*media_timer)(unsigned long data); diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index e4188d0..9220de9 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -905,7 +905,7 @@ static int epp_open(struct net_device *dev) /* autoprobe baud rate */ tstart = jiffies; i = 0; - while ((signed)(jiffies-tstart-HZ/3) < 0) { + while (time_before(jiffies, tstart + HZ/3)) { if (pp->ops->epp_read_addr(pp, &stat, 1, 0) != 1) goto epptimeout; if ((stat & (EPP_NRAEF|EPP_NRHF)) == EPP_NRHF) { diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index 55c7ed6..247c8ca 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -115,6 +115,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/io.h> @@ -1499,7 +1500,7 @@ static int hp100_start_xmit_bm(struct sk_buff *skb, struct net_device *dev) printk("hp100: %s: start_xmit_bm: No TX PDL available.\n", dev->name); #endif /* not waited long enough since last tx? */ - if (jiffies - dev->trans_start < HZ) + if (time_before(jiffies, dev->trans_start + HZ)) return -EAGAIN; if (hp100_check_lan(dev)) @@ -1652,7 +1653,7 @@ static int hp100_start_xmit(struct sk_buff *skb, struct net_device *dev) printk("hp100: %s: start_xmit: tx free mem = 0x%x\n", dev->name, i); #endif /* not waited long enough since last failed tx try? */ - if (jiffies - dev->trans_start < HZ) { + if (time_before(jiffies, dev->trans_start + HZ)) { #ifdef HP100_DEBUG printk("hp100: %s: trans_start timing problem\n", dev->name); @@ -1718,17 +1719,10 @@ static int hp100_start_xmit(struct sk_buff *skb, struct net_device *dev) hp100_outw(i, FRAGMENT_LEN); /* and first/only fragment length */ if (lp->mode == 2) { /* memory mapped */ - if (lp->mem_ptr_virt) { /* high pci memory was remapped */ - /* Note: The J2585B needs alignment to 32bits here! */ - memcpy_toio(lp->mem_ptr_virt, skb->data, (skb->len + 3) & ~3); - if (!ok_flag) - memset_io(lp->mem_ptr_virt, 0, HP100_MIN_PACKET_SIZE - skb->len); - } else { - /* Note: The J2585B needs alignment to 32bits here! */ - isa_memcpy_toio(lp->mem_ptr_phys, skb->data, (skb->len + 3) & ~3); - if (!ok_flag) - isa_memset_io(lp->mem_ptr_phys, 0, HP100_MIN_PACKET_SIZE - skb->len); - } + /* Note: The J2585B needs alignment to 32bits here! */ + memcpy_toio(lp->mem_ptr_virt, skb->data, (skb->len + 3) & ~3); + if (!ok_flag) + memset_io(lp->mem_ptr_virt, 0, HP100_MIN_PACKET_SIZE - skb->len); } else { /* programmed i/o */ outsl(ioaddr + HP100_REG_DATA32, skb->data, (skb->len + 3) >> 2); @@ -1798,10 +1792,7 @@ static void hp100_rx(struct net_device *dev) /* First we get the header, which contains information about the */ /* actual length of the received packet. */ if (lp->mode == 2) { /* memory mapped mode */ - if (lp->mem_ptr_virt) /* if memory was remapped */ - header = readl(lp->mem_ptr_virt); - else - header = isa_readl(lp->mem_ptr_phys); + header = readl(lp->mem_ptr_virt); } else /* programmed i/o */ header = hp100_inl(DATA32); @@ -1833,13 +1824,9 @@ static void hp100_rx(struct net_device *dev) ptr = skb->data; /* Now transfer the data from the card into that area */ - if (lp->mode == 2) { - if (lp->mem_ptr_virt) - memcpy_fromio(ptr, lp->mem_ptr_virt,pkt_len); - /* Note alignment to 32bit transfers */ - else - isa_memcpy_fromio(ptr, lp->mem_ptr_phys, pkt_len); - } else /* io mapped */ + if (lp->mode == 2) + memcpy_fromio(ptr, lp->mem_ptr_virt,pkt_len); + else /* io mapped */ insl(ioaddr + HP100_REG_DATA32, ptr, pkt_len >> 2); skb->protocol = eth_type_trans(skb, dev); diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c index 591c586..7e49522 100644 --- a/drivers/net/ibm_emac/ibm_emac_core.c +++ b/drivers/net/ibm_emac/ibm_emac_core.c @@ -204,7 +204,7 @@ static inline int emac_phy_gpcs(int phy_mode) static inline void emac_tx_enable(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; unsigned long flags; u32 r; @@ -220,7 +220,7 @@ static inline void emac_tx_enable(struct ocp_enet_private *dev) static void emac_tx_disable(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; unsigned long flags; u32 r; @@ -244,7 +244,7 @@ static void emac_tx_disable(struct ocp_enet_private *dev) static void emac_rx_enable(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; unsigned long flags; u32 r; @@ -275,7 +275,7 @@ static void emac_rx_enable(struct ocp_enet_private *dev) static void emac_rx_disable(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; unsigned long flags; u32 r; @@ -299,7 +299,7 @@ static void emac_rx_disable(struct ocp_enet_private *dev) static inline void emac_rx_disable_async(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; unsigned long flags; u32 r; @@ -315,7 +315,7 @@ static inline void emac_rx_disable_async(struct ocp_enet_private *dev) static int emac_reset(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; unsigned long flags; int n = 20; @@ -348,7 +348,7 @@ static int emac_reset(struct ocp_enet_private *dev) static void emac_hash_mc(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; u16 gaht[4] = { 0 }; struct dev_mc_list *dmi; @@ -393,7 +393,7 @@ static inline int emac_opb_mhz(void) /* BHs disabled */ static int emac_configure(struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; struct net_device *ndev = dev->ndev; int gige; u32 r; @@ -555,7 +555,7 @@ static void emac_full_tx_reset(struct net_device *ndev) static int __emac_mdio_read(struct ocp_enet_private *dev, u8 id, u8 reg) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; u32 r; int n; @@ -604,7 +604,7 @@ static int __emac_mdio_read(struct ocp_enet_private *dev, u8 id, u8 reg) static void __emac_mdio_write(struct ocp_enet_private *dev, u8 id, u8 reg, u16 val) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; int n; DBG2("%d: mdio_write(%02x,%02x,%04x)" NL, dev->def->index, id, reg, @@ -666,7 +666,7 @@ static void emac_mdio_write(struct net_device *ndev, int id, int reg, int val) static void emac_set_multicast_list(struct net_device *ndev) { struct ocp_enet_private *dev = ndev->priv; - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; u32 rmr = emac_iff2rmr(ndev); DBG("%d: multicast %08x" NL, dev->def->index, rmr); @@ -825,7 +825,7 @@ static void emac_clean_rx_ring(struct ocp_enet_private *dev) } static inline int emac_alloc_rx_skb(struct ocp_enet_private *dev, int slot, - int flags) + gfp_t flags) { struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags); if (unlikely(!skb)) @@ -1047,7 +1047,7 @@ static inline u16 emac_tx_csum(struct ocp_enet_private *dev, static inline int emac_xmit_finish(struct ocp_enet_private *dev, int len) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; struct net_device *ndev = dev->ndev; /* Send the packet out */ @@ -1519,7 +1519,7 @@ static void emac_rxde(void *param) static irqreturn_t emac_irq(int irq, void *dev_instance, struct pt_regs *regs) { struct ocp_enet_private *dev = dev_instance; - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; struct ibm_emac_error_stats *st = &dev->estats; u32 isr = in_be32(&p->isr); @@ -1619,17 +1619,17 @@ static void emac_remove(struct ocp_device *ocpdev) DBG("%d: remove" NL, dev->def->index); - ocp_set_drvdata(ocpdev, 0); + ocp_set_drvdata(ocpdev, NULL); unregister_netdev(dev->ndev); tah_fini(dev->tah_dev); rgmii_fini(dev->rgmii_dev, dev->rgmii_input); zmii_fini(dev->zmii_dev, dev->zmii_input); - emac_dbg_register(dev->def->index, 0); + emac_dbg_register(dev->def->index, NULL); mal_unregister_commac(dev->mal, &dev->commac); - iounmap((void *)dev->emacp); + iounmap(dev->emacp); kfree(dev->ndev); } @@ -2048,9 +2048,7 @@ static int __init emac_probe(struct ocp_device *ocpdev) goto out4; /* Map EMAC regs */ - dev->emacp = - (struct emac_regs *)ioremap(dev->def->paddr, - sizeof(struct emac_regs)); + dev->emacp = ioremap(dev->def->paddr, sizeof(struct emac_regs)); if (!dev->emacp) { printk(KERN_ERR "emac%d: could not ioremap device registers!\n", dev->def->index); @@ -2210,7 +2208,7 @@ static int __init emac_probe(struct ocp_device *ocpdev) return 0; out6: - iounmap((void *)dev->emacp); + iounmap(dev->emacp); out5: tah_fini(dev->tah_dev); out4: diff --git a/drivers/net/ibm_emac/ibm_emac_core.h b/drivers/net/ibm_emac/ibm_emac_core.h index 911abba..f61273b 100644 --- a/drivers/net/ibm_emac/ibm_emac_core.h +++ b/drivers/net/ibm_emac/ibm_emac_core.h @@ -155,7 +155,7 @@ struct ibm_emac_error_stats { struct ocp_enet_private { struct net_device *ndev; /* 0 */ - struct emac_regs *emacp; + struct emac_regs __iomem *emacp; struct mal_descriptor *tx_desc; int tx_cnt; diff --git a/drivers/net/ibm_emac/ibm_emac_debug.c b/drivers/net/ibm_emac/ibm_emac_debug.c index 75d3b86..c7e1ecf 100644 --- a/drivers/net/ibm_emac/ibm_emac_debug.c +++ b/drivers/net/ibm_emac/ibm_emac_debug.c @@ -58,7 +58,7 @@ static void emac_desc_dump(int idx, struct ocp_enet_private *p) static void emac_mac_dump(int idx, struct ocp_enet_private *dev) { - struct emac_regs *p = dev->emacp; + struct emac_regs __iomem *p = dev->emacp; printk("** EMAC%d registers **\n" "MR0 = 0x%08x MR1 = 0x%08x TMR0 = 0x%08x TMR1 = 0x%08x\n" diff --git a/drivers/net/ibm_emac/ibm_emac_rgmii.h b/drivers/net/ibm_emac/ibm_emac_rgmii.h index a1ffb8a..7f03d53 100644 --- a/drivers/net/ibm_emac/ibm_emac_rgmii.h +++ b/drivers/net/ibm_emac/ibm_emac_rgmii.h @@ -31,7 +31,7 @@ struct rgmii_regs { /* RGMII device */ struct ibm_ocp_rgmii { - struct rgmii_regs *base; + struct rgmii_regs __iomem *base; int users; /* number of EMACs using this RGMII bridge */ }; diff --git a/drivers/net/ibm_emac/ibm_emac_zmii.c b/drivers/net/ibm_emac/ibm_emac_zmii.c index 35c1185..e129e0a 100644 --- a/drivers/net/ibm_emac/ibm_emac_zmii.c +++ b/drivers/net/ibm_emac/ibm_emac_zmii.c @@ -80,7 +80,7 @@ static inline u32 zmii_mode_mask(int mode, int input) static int __init zmii_init(struct ocp_device *ocpdev, int input, int *mode) { struct ibm_ocp_zmii *dev = ocp_get_drvdata(ocpdev); - struct zmii_regs *p; + struct zmii_regs __iomem *p; ZMII_DBG("%d: init(%d, %d)" NL, ocpdev->def->index, input, *mode); @@ -94,8 +94,7 @@ static int __init zmii_init(struct ocp_device *ocpdev, int input, int *mode) } dev->mode = PHY_MODE_NA; - p = (struct zmii_regs *)ioremap(ocpdev->def->paddr, - sizeof(struct zmii_regs)); + p = ioremap(ocpdev->def->paddr, sizeof(struct zmii_regs)); if (!p) { printk(KERN_ERR "zmii%d: could not ioremap device registers!\n", @@ -231,7 +230,7 @@ void __exit __zmii_fini(struct ocp_device *ocpdev, int input) if (!--dev->users) { /* Free everything if this is the last user */ ocp_set_drvdata(ocpdev, NULL); - iounmap((void *)dev->base); + iounmap(dev->base); kfree(dev); } } diff --git a/drivers/net/ibm_emac/ibm_emac_zmii.h b/drivers/net/ibm_emac/ibm_emac_zmii.h index 0bb2606..92c8544 100644 --- a/drivers/net/ibm_emac/ibm_emac_zmii.h +++ b/drivers/net/ibm_emac/ibm_emac_zmii.h @@ -32,7 +32,7 @@ struct zmii_regs { /* ZMII device */ struct ibm_ocp_zmii { - struct zmii_regs *base; + struct zmii_regs __iomem *base; int mode; /* subset of PHY_MODE_XXXX */ int users; /* number of EMACs using this ZMII bridge */ u32 fer_save; /* FER value left by firmware */ diff --git a/drivers/net/irda/Kconfig b/drivers/net/irda/Kconfig index 7a08134..c81fe1c 100644 --- a/drivers/net/irda/Kconfig +++ b/drivers/net/irda/Kconfig @@ -283,7 +283,7 @@ config USB_IRDA Say Y here if you want to build support for the USB IrDA FIR Dongle device driver. To compile it as a module, choose M here: the module will be called irda-usb. IrDA-USB support the various IrDA USB - dongles available and most of their pecularities. Those dongles + dongles available and most of their peculiarities. Those dongles plug in the USB port of your computer, are plug and play, and support SIR and FIR (4Mbps) speeds. On the other hand, those dongles tend to be less efficient than a FIR chipset. @@ -360,7 +360,7 @@ config ALI_FIR help Say Y here if you want to build support for the ALi M5123 FIR Controller. The ALi M5123 FIR Controller is embedded in ALi M1543C, - M1535, M1535D, M1535+, M1535D Sourth Bridge. This driver supports + M1535, M1535D, M1535+, M1535D South Bridge. This driver supports SIR, MIR and FIR (4Mbps) speeds. To compile it as a module, choose M here: the module will be called diff --git a/drivers/net/macsonic.c b/drivers/net/macsonic.c index 02d5c68..f6f3daf 100644 --- a/drivers/net/macsonic.c +++ b/drivers/net/macsonic.c @@ -622,7 +622,7 @@ static int __init mac_sonic_init_module(void) return 0; out_unregister: - driver_unregister(&mac_sonic_driver); + platform_driver_unregister(&mac_sonic_driver); return -ENOMEM; } diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index c0998ef..9f26613 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -10,7 +10,7 @@ * * Copyright (C) 2003 Ralf Baechle <ralf@linux-mips.org> * - * Copyright (C) 2004-2005 MontaVista Software, Inc. + * Copyright (C) 2004-2006 MontaVista Software, Inc. * Dale Farnsworth <dale@farnsworth.org> * * Copyright (C) 2004 Steven J. Hill <sjhill1@rockwellcollins.com> @@ -37,8 +37,6 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/etherdevice.h> -#include <linux/in.h> -#include <linux/ip.h> #include <linux/bitops.h> #include <linux/delay.h> @@ -52,39 +50,16 @@ #include <asm/delay.h> #include "mv643xx_eth.h" -/* - * The first part is the high level driver of the gigE ethernet ports. - */ - -/* Constants */ -#define VLAN_HLEN 4 -#define FCS_LEN 4 -#define DMA_ALIGN 8 /* hw requires 8-byte alignment */ -#define HW_IP_ALIGN 2 /* hw aligns IP header */ -#define WRAP HW_IP_ALIGN + ETH_HLEN + VLAN_HLEN + FCS_LEN -#define RX_SKB_SIZE ((dev->mtu + WRAP + 7) & ~0x7) - -#define INT_UNMASK_ALL 0x0007ffff -#define INT_UNMASK_ALL_EXT 0x0011ffff -#define INT_MASK_ALL 0x00000000 -#define INT_MASK_ALL_EXT 0x00000000 -#define INT_CAUSE_CHECK_BITS INT_CAUSE_UNMASK_ALL -#define INT_CAUSE_CHECK_BITS_EXT INT_CAUSE_UNMASK_ALL_EXT - -#ifdef MV643XX_CHECKSUM_OFFLOAD_TX -#define MAX_DESCS_PER_SKB (MAX_SKB_FRAGS + 1) -#else -#define MAX_DESCS_PER_SKB 1 -#endif - -#define PHY_WAIT_ITERATIONS 1000 /* 1000 iterations * 10uS = 10mS max */ -#define PHY_WAIT_MICRO_SECONDS 10 - /* Static function declarations */ -static int eth_port_link_is_up(unsigned int eth_port_num); static void eth_port_uc_addr_get(struct net_device *dev, unsigned char *MacAddr); static void eth_port_set_multicast_list(struct net_device *); +static void mv643xx_eth_port_enable_tx(unsigned int port_num, + unsigned int queues); +static void mv643xx_eth_port_enable_rx(unsigned int port_num, + unsigned int queues); +static unsigned int mv643xx_eth_port_disable_tx(unsigned int port_num); +static unsigned int mv643xx_eth_port_disable_rx(unsigned int port_num); static int mv643xx_eth_open(struct net_device *); static int mv643xx_eth_stop(struct net_device *); static int mv643xx_eth_change_mtu(struct net_device *, int); @@ -93,8 +68,12 @@ static void eth_port_init_mac_tables(unsigned int eth_port_num); #ifdef MV643XX_NAPI static int mv643xx_poll(struct net_device *dev, int *budget); #endif +static int ethernet_phy_get(unsigned int eth_port_num); static void ethernet_phy_set(unsigned int eth_port_num, int phy_addr); static int ethernet_phy_detect(unsigned int eth_port_num); +static int mv643xx_mdio_read(struct net_device *dev, int phy_id, int location); +static void mv643xx_mdio_write(struct net_device *dev, int phy_id, int location, int val); +static int mv643xx_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static struct ethtool_ops mv643xx_ethtool_ops; static char mv643xx_driver_name[] = "mv643xx_eth"; @@ -153,67 +132,53 @@ static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu) } /* - * mv643xx_eth_rx_task + * mv643xx_eth_rx_refill_descs * * Fills / refills RX queue on a certain gigabit ethernet port * * Input : pointer to ethernet interface network device structure * Output : N/A */ -static void mv643xx_eth_rx_task(void *data) +static void mv643xx_eth_rx_refill_descs(struct net_device *dev) { - struct net_device *dev = (struct net_device *)data; struct mv643xx_private *mp = netdev_priv(dev); struct pkt_info pkt_info; struct sk_buff *skb; int unaligned; - if (test_and_set_bit(0, &mp->rx_task_busy)) - panic("%s: Error in test_set_bit / clear_bit", dev->name); - - while (mp->rx_ring_skbs < (mp->rx_ring_size - 5)) { - skb = dev_alloc_skb(RX_SKB_SIZE + DMA_ALIGN); + while (mp->rx_desc_count < mp->rx_ring_size) { + skb = dev_alloc_skb(ETH_RX_SKB_SIZE + ETH_DMA_ALIGN); if (!skb) break; - mp->rx_ring_skbs++; - unaligned = (u32)skb->data & (DMA_ALIGN - 1); + mp->rx_desc_count++; + unaligned = (u32)skb->data & (ETH_DMA_ALIGN - 1); if (unaligned) - skb_reserve(skb, DMA_ALIGN - unaligned); + skb_reserve(skb, ETH_DMA_ALIGN - unaligned); pkt_info.cmd_sts = ETH_RX_ENABLE_INTERRUPT; - pkt_info.byte_cnt = RX_SKB_SIZE; - pkt_info.buf_ptr = dma_map_single(NULL, skb->data, RX_SKB_SIZE, - DMA_FROM_DEVICE); + pkt_info.byte_cnt = ETH_RX_SKB_SIZE; + pkt_info.buf_ptr = dma_map_single(NULL, skb->data, + ETH_RX_SKB_SIZE, DMA_FROM_DEVICE); pkt_info.return_info = skb; if (eth_rx_return_buff(mp, &pkt_info) != ETH_OK) { printk(KERN_ERR "%s: Error allocating RX Ring\n", dev->name); break; } - skb_reserve(skb, HW_IP_ALIGN); + skb_reserve(skb, ETH_HW_IP_ALIGN); } - clear_bit(0, &mp->rx_task_busy); /* * If RX ring is empty of SKB, set a timer to try allocating - * again in a later time . + * again at a later time. */ - if ((mp->rx_ring_skbs == 0) && (mp->rx_timer_flag == 0)) { + if (mp->rx_desc_count == 0) { printk(KERN_INFO "%s: Rx ring is empty\n", dev->name); - /* After 100mSec */ - mp->timeout.expires = jiffies + (HZ / 10); + mp->timeout.expires = jiffies + (HZ / 10); /* 100 mSec */ add_timer(&mp->timeout); - mp->rx_timer_flag = 1; - } -#ifdef MV643XX_RX_QUEUE_FILL_ON_TASK - else { - /* Return interrupts */ - mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(mp->port_num), - INT_UNMASK_ALL); } -#endif } /* - * mv643xx_eth_rx_task_timer_wrapper + * mv643xx_eth_rx_refill_descs_timer_wrapper * * Timer routine to wake up RX queue filling task. This function is * used only in case the RX queue is empty, and all alloc_skb has @@ -222,13 +187,9 @@ static void mv643xx_eth_rx_task(void *data) * Input : pointer to ethernet interface network device structure * Output : N/A */ -static void mv643xx_eth_rx_task_timer_wrapper(unsigned long data) +static inline void mv643xx_eth_rx_refill_descs_timer_wrapper(unsigned long data) { - struct net_device *dev = (struct net_device *)data; - struct mv643xx_private *mp = netdev_priv(dev); - - mp->rx_timer_flag = 0; - mv643xx_eth_rx_task((void *)data); + mv643xx_eth_rx_refill_descs((struct net_device *)data); } /* @@ -245,8 +206,7 @@ static void mv643xx_eth_update_mac_address(struct net_device *dev) unsigned int port_num = mp->port_num; eth_port_init_mac_tables(port_num); - memcpy(mp->port_mac_addr, dev->dev_addr, 6); - eth_port_uc_addr_set(port_num, mp->port_mac_addr); + eth_port_uc_addr_set(port_num, dev->dev_addr); } /* @@ -260,13 +220,14 @@ static void mv643xx_eth_update_mac_address(struct net_device *dev) static void mv643xx_eth_set_rx_mode(struct net_device *dev) { struct mv643xx_private *mp = netdev_priv(dev); + u32 config_reg; + config_reg = mv_read(MV643XX_ETH_PORT_CONFIG_REG(mp->port_num)); if (dev->flags & IFF_PROMISC) - mp->port_config |= (u32) MV643XX_ETH_UNICAST_PROMISCUOUS_MODE; + config_reg |= (u32) MV643XX_ETH_UNICAST_PROMISCUOUS_MODE; else - mp->port_config &= ~(u32) MV643XX_ETH_UNICAST_PROMISCUOUS_MODE; - - mv_write(MV643XX_ETH_PORT_CONFIG_REG(mp->port_num), mp->port_config); + config_reg &= ~(u32) MV643XX_ETH_UNICAST_PROMISCUOUS_MODE; + mv_write(MV643XX_ETH_PORT_CONFIG_REG(mp->port_num), config_reg); eth_port_set_multicast_list(dev); } @@ -322,53 +283,82 @@ static void mv643xx_eth_tx_timeout_task(struct net_device *dev) netif_device_detach(dev); eth_port_reset(mp->port_num); - eth_port_start(mp); + eth_port_start(dev); netif_device_attach(dev); } -/* - * mv643xx_eth_free_tx_queue - * - * Input : dev - a pointer to the required interface +/** + * mv643xx_eth_free_tx_descs - Free the tx desc data for completed descriptors * - * Output : 0 if was able to release skb , nonzero otherwise + * If force is non-zero, frees uncompleted descriptors as well */ -static int mv643xx_eth_free_tx_queue(struct net_device *dev, - unsigned int eth_int_cause_ext) +int mv643xx_eth_free_tx_descs(struct net_device *dev, int force) { struct mv643xx_private *mp = netdev_priv(dev); - struct net_device_stats *stats = &mp->stats; - struct pkt_info pkt_info; - int released = 1; + struct eth_tx_desc *desc; + u32 cmd_sts; + struct sk_buff *skb; + unsigned long flags; + int tx_index; + dma_addr_t addr; + int count; + int released = 0; + + while (mp->tx_desc_count > 0) { + spin_lock_irqsave(&mp->lock, flags); + tx_index = mp->tx_used_desc_q; + desc = &mp->p_tx_desc_area[tx_index]; + cmd_sts = desc->cmd_sts; + + if (!force && (cmd_sts & ETH_BUFFER_OWNED_BY_DMA)) { + spin_unlock_irqrestore(&mp->lock, flags); + return released; + } - if (!(eth_int_cause_ext & (BIT0 | BIT8))) - return released; + mp->tx_used_desc_q = (tx_index + 1) % mp->tx_ring_size; + mp->tx_desc_count--; - /* Check only queue 0 */ - while (eth_tx_return_desc(mp, &pkt_info) == ETH_OK) { - if (pkt_info.cmd_sts & BIT0) { + addr = desc->buf_ptr; + count = desc->byte_cnt; + skb = mp->tx_skb[tx_index]; + if (skb) + mp->tx_skb[tx_index] = NULL; + + spin_unlock_irqrestore(&mp->lock, flags); + + if (cmd_sts & ETH_ERROR_SUMMARY) { printk("%s: Error in TX\n", dev->name); - stats->tx_errors++; + mp->stats.tx_errors++; } - if (pkt_info.cmd_sts & ETH_TX_FIRST_DESC) - dma_unmap_single(NULL, pkt_info.buf_ptr, - pkt_info.byte_cnt, - DMA_TO_DEVICE); + if (cmd_sts & ETH_TX_FIRST_DESC) + dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE); else - dma_unmap_page(NULL, pkt_info.buf_ptr, - pkt_info.byte_cnt, - DMA_TO_DEVICE); + dma_unmap_page(NULL, addr, count, DMA_TO_DEVICE); - if (pkt_info.return_info) { - dev_kfree_skb_irq(pkt_info.return_info); - released = 0; - } + if (skb) + dev_kfree_skb_irq(skb); + + released = 1; } return released; } +static void mv643xx_eth_free_completed_tx_descs(struct net_device *dev) +{ + struct mv643xx_private *mp = netdev_priv(dev); + + if (mv643xx_eth_free_tx_descs(dev, 0) && + mp->tx_ring_size - mp->tx_desc_count >= MAX_DESCS_PER_SKB) + netif_wake_queue(dev); +} + +static void mv643xx_eth_free_all_tx_descs(struct net_device *dev) +{ + mv643xx_eth_free_tx_descs(dev, 1); +} + /* * mv643xx_eth_receive * @@ -380,11 +370,7 @@ static int mv643xx_eth_free_tx_queue(struct net_device *dev, * * Output : number of served packets */ -#ifdef MV643XX_NAPI static int mv643xx_eth_receive_queue(struct net_device *dev, int budget) -#else -static int mv643xx_eth_receive_queue(struct net_device *dev) -#endif { struct mv643xx_private *mp = netdev_priv(dev); struct net_device_stats *stats = &mp->stats; @@ -392,15 +378,14 @@ static int mv643xx_eth_receive_queue(struct net_device *dev) struct sk_buff *skb; struct pkt_info pkt_info; -#ifdef MV643XX_NAPI while (budget-- > 0 && eth_port_receive(mp, &pkt_info) == ETH_OK) { -#else - while (eth_port_receive(mp, &pkt_info) == ETH_OK) { -#endif - mp->rx_ring_skbs--; + mp->rx_desc_count--; received_packets++; - /* Update statistics. Note byte count includes 4 byte CRC count */ + /* + * Update statistics. + * Note byte count includes 4 byte CRC count + */ stats->rx_packets++; stats->rx_bytes += pkt_info.byte_cnt; skb = pkt_info.return_info; @@ -448,10 +433,61 @@ static int mv643xx_eth_receive_queue(struct net_device *dev) } dev->last_rx = jiffies; } + mv643xx_eth_rx_refill_descs(dev); /* Fill RX ring with skb's */ return received_packets; } +/* Set the mv643xx port configuration register for the speed/duplex mode. */ +static void mv643xx_eth_update_pscr(struct net_device *dev, + struct ethtool_cmd *ecmd) +{ + struct mv643xx_private *mp = netdev_priv(dev); + int port_num = mp->port_num; + u32 o_pscr, n_pscr; + unsigned int queues; + + o_pscr = mv_read(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num)); + n_pscr = o_pscr; + + /* clear speed, duplex and rx buffer size fields */ + n_pscr &= ~(MV643XX_ETH_SET_MII_SPEED_TO_100 | + MV643XX_ETH_SET_GMII_SPEED_TO_1000 | + MV643XX_ETH_SET_FULL_DUPLEX_MODE | + MV643XX_ETH_MAX_RX_PACKET_MASK); + + if (ecmd->duplex == DUPLEX_FULL) + n_pscr |= MV643XX_ETH_SET_FULL_DUPLEX_MODE; + + if (ecmd->speed == SPEED_1000) + n_pscr |= MV643XX_ETH_SET_GMII_SPEED_TO_1000 | + MV643XX_ETH_MAX_RX_PACKET_9700BYTE; + else { + if (ecmd->speed == SPEED_100) + n_pscr |= MV643XX_ETH_SET_MII_SPEED_TO_100; + n_pscr |= MV643XX_ETH_MAX_RX_PACKET_1522BYTE; + } + + if (n_pscr != o_pscr) { + if ((o_pscr & MV643XX_ETH_SERIAL_PORT_ENABLE) == 0) + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), + n_pscr); + else { + queues = mv643xx_eth_port_disable_tx(port_num); + + o_pscr &= ~MV643XX_ETH_SERIAL_PORT_ENABLE; + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), + o_pscr); + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), + n_pscr); + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), + n_pscr); + if (queues) + mv643xx_eth_port_enable_tx(port_num, queues); + } + } +} + /* * mv643xx_eth_int_handler * @@ -473,78 +509,52 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id, /* Read interrupt cause registers */ eth_int_cause = mv_read(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num)) & - INT_UNMASK_ALL; - - if (eth_int_cause & BIT1) + ETH_INT_UNMASK_ALL; + if (eth_int_cause & ETH_INT_CAUSE_EXT) { eth_int_cause_ext = mv_read( MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num)) & - INT_UNMASK_ALL_EXT; + ETH_INT_UNMASK_ALL_EXT; + mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num), + ~eth_int_cause_ext); + } -#ifdef MV643XX_NAPI - if (!(eth_int_cause & 0x0007fffd)) { - /* Dont ack the Rx interrupt */ -#endif - /* - * Clear specific ethernet port intrerrupt registers by - * acknowleding relevant bits. - */ - mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), - ~eth_int_cause); - if (eth_int_cause_ext != 0x0) - mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG - (port_num), ~eth_int_cause_ext); - - /* UDP change : We may need this */ - if ((eth_int_cause_ext & 0x0000ffff) && - (mv643xx_eth_free_tx_queue(dev, eth_int_cause_ext) == 0) && - (mp->tx_ring_size > mp->tx_ring_skbs + MAX_DESCS_PER_SKB)) - netif_wake_queue(dev); -#ifdef MV643XX_NAPI - } else { - if (netif_rx_schedule_prep(dev)) { - /* Mask all the interrupts */ - mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), - INT_MASK_ALL); - /* wait for previous write to complete */ - mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); - __netif_rx_schedule(dev); + /* PHY status changed */ + if (eth_int_cause_ext & ETH_INT_CAUSE_PHY) { + struct ethtool_cmd cmd; + + if (mii_link_ok(&mp->mii)) { + mii_ethtool_gset(&mp->mii, &cmd); + mv643xx_eth_update_pscr(dev, &cmd); + mv643xx_eth_port_enable_tx(port_num, + ETH_TX_QUEUES_ENABLED); + if (!netif_carrier_ok(dev)) { + netif_carrier_on(dev); + if (mp->tx_ring_size - mp->tx_desc_count >= + MAX_DESCS_PER_SKB) + netif_wake_queue(dev); + } + } else if (netif_carrier_ok(dev)) { + netif_stop_queue(dev); + netif_carrier_off(dev); } -#else - if (eth_int_cause & (BIT2 | BIT11)) - mv643xx_eth_receive_queue(dev, 0); + } - /* - * After forwarded received packets to upper layer, add a task - * in an interrupts enabled context that refills the RX ring - * with skb's. - */ -#ifdef MV643XX_RX_QUEUE_FILL_ON_TASK - /* Mask all interrupts on ethernet port */ +#ifdef MV643XX_NAPI + if (eth_int_cause & ETH_INT_CAUSE_RX) { + /* schedule the NAPI poll routine to maintain port */ mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), - INT_MASK_ALL); - /* wait for previous write to take effect */ + ETH_INT_MASK_ALL); + /* wait for previous write to complete */ mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); - queue_task(&mp->rx_task, &tq_immediate); - mark_bh(IMMEDIATE_BH); + netif_rx_schedule(dev); + } #else - mp->rx_task.func(dev); + if (eth_int_cause & ETH_INT_CAUSE_RX) + mv643xx_eth_receive_queue(dev, INT_MAX); + if (eth_int_cause_ext & ETH_INT_CAUSE_TX) + mv643xx_eth_free_completed_tx_descs(dev); #endif -#endif - } - /* PHY status changed */ - if (eth_int_cause_ext & (BIT16 | BIT20)) { - if (eth_port_link_is_up(port_num)) { - netif_carrier_on(dev); - netif_wake_queue(dev); - /* Start TX queue */ - mv_write(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG - (port_num), 1); - } else { - netif_carrier_off(dev); - netif_stop_queue(dev); - } - } /* * If no real interrupt occured, exit. @@ -670,9 +680,6 @@ static void ether_init_rx_desc_ring(struct mv643xx_private *mp) mp->rx_used_desc_q = 0; mp->rx_desc_area_size = rx_desc_num * sizeof(struct eth_rx_desc); - - /* Add the queue to the list of RX queues of this port */ - mp->port_rx_queue_command |= 1; } /* @@ -712,14 +719,36 @@ static void ether_init_tx_desc_ring(struct mv643xx_private *mp) mp->tx_curr_desc_q = 0; mp->tx_used_desc_q = 0; -#ifdef MV643XX_CHECKSUM_OFFLOAD_TX - mp->tx_first_desc_q = 0; -#endif mp->tx_desc_area_size = tx_desc_num * sizeof(struct eth_tx_desc); +} - /* Add the queue to the list of Tx queues of this port */ - mp->port_tx_queue_command |= 1; +static int mv643xx_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct mv643xx_private *mp = netdev_priv(dev); + int err; + + spin_lock_irq(&mp->lock); + err = mii_ethtool_sset(&mp->mii, cmd); + spin_unlock_irq(&mp->lock); + + return err; +} + +static int mv643xx_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct mv643xx_private *mp = netdev_priv(dev); + int err; + + spin_lock_irq(&mp->lock); + err = mii_ethtool_gset(&mp->mii, cmd); + spin_unlock_irq(&mp->lock); + + /* The PHY may support 1000baseT_Half, but the mv643xx does not */ + cmd->supported &= ~SUPPORTED_1000baseT_Half; + cmd->advertising &= ~ADVERTISED_1000baseT_Half; + + return err; } /* @@ -750,23 +779,12 @@ static int mv643xx_eth_open(struct net_device *dev) return -EAGAIN; } - /* Stop RX Queues */ - mv_write(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num), 0x0000ff00); - - /* Set the MAC Address */ - memcpy(mp->port_mac_addr, dev->dev_addr, 6); - eth_port_init(mp); - INIT_WORK(&mp->rx_task, (void (*)(void *))mv643xx_eth_rx_task, dev); - memset(&mp->timeout, 0, sizeof(struct timer_list)); - mp->timeout.function = mv643xx_eth_rx_task_timer_wrapper; + mp->timeout.function = mv643xx_eth_rx_refill_descs_timer_wrapper; mp->timeout.data = (unsigned long)dev; - mp->rx_task_busy = 0; - mp->rx_timer_flag = 0; - /* Allocate RX and TX skb rings */ mp->rx_skb = kmalloc(sizeof(*mp->rx_skb) * mp->rx_ring_size, GFP_KERNEL); @@ -784,7 +802,7 @@ static int mv643xx_eth_open(struct net_device *dev) } /* Allocate TX ring */ - mp->tx_ring_skbs = 0; + mp->tx_desc_count = 0; size = mp->tx_ring_size * sizeof(struct eth_tx_desc); mp->tx_desc_area_size = size; @@ -809,7 +827,7 @@ static int mv643xx_eth_open(struct net_device *dev) ether_init_tx_desc_ring(mp); /* Allocate RX ring */ - mp->rx_ring_skbs = 0; + mp->rx_desc_count = 0; size = mp->rx_ring_size * sizeof(struct eth_rx_desc); mp->rx_desc_area_size = size; @@ -839,9 +857,13 @@ static int mv643xx_eth_open(struct net_device *dev) ether_init_rx_desc_ring(mp); - mv643xx_eth_rx_task(dev); /* Fill RX ring with skb's */ + mv643xx_eth_rx_refill_descs(dev); /* Fill RX ring with skb's */ + + /* Clear any pending ethernet port interrupts */ + mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0); + mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num), 0); - eth_port_start(mp); + eth_port_start(dev); /* Interrupt Coalescing */ @@ -853,16 +875,13 @@ static int mv643xx_eth_open(struct net_device *dev) mp->tx_int_coal = eth_port_set_tx_coal(port_num, 133000000, MV643XX_TX_COAL); - /* Clear any pending ethernet port interrupts */ - mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0); - mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num), 0); - /* Unmask phy and link status changes interrupts */ mv_write(MV643XX_ETH_INTERRUPT_EXTEND_MASK_REG(port_num), - INT_UNMASK_ALL_EXT); + ETH_INT_UNMASK_ALL_EXT); /* Unmask RX buffer and TX end interrupt */ - mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), INT_UNMASK_ALL); + mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), ETH_INT_UNMASK_ALL); + return 0; out_free_tx_skb: @@ -878,25 +897,14 @@ out_free_irq: static void mv643xx_eth_free_tx_rings(struct net_device *dev) { struct mv643xx_private *mp = netdev_priv(dev); - unsigned int port_num = mp->port_num; - unsigned int curr; - struct sk_buff *skb; /* Stop Tx Queues */ - mv_write(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num), 0x0000ff00); - - /* Free outstanding skb's on TX rings */ - for (curr = 0; mp->tx_ring_skbs && curr < mp->tx_ring_size; curr++) { - skb = mp->tx_skb[curr]; - if (skb) { - mp->tx_ring_skbs -= skb_shinfo(skb)->nr_frags; - dev_kfree_skb(skb); - mp->tx_ring_skbs--; - } - } - if (mp->tx_ring_skbs) - printk("%s: Error on Tx descriptor free - could not free %d" - " descriptors\n", dev->name, mp->tx_ring_skbs); + mv643xx_eth_port_disable_tx(mp->port_num); + + /* Free outstanding skb's on TX ring */ + mv643xx_eth_free_all_tx_descs(dev); + + BUG_ON(mp->tx_used_desc_q != mp->tx_curr_desc_q); /* Free TX ring */ if (mp->tx_sram_size) @@ -913,21 +921,21 @@ static void mv643xx_eth_free_rx_rings(struct net_device *dev) int curr; /* Stop RX Queues */ - mv_write(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num), 0x0000ff00); + mv643xx_eth_port_disable_rx(port_num); /* Free preallocated skb's on RX rings */ - for (curr = 0; mp->rx_ring_skbs && curr < mp->rx_ring_size; curr++) { + for (curr = 0; mp->rx_desc_count && curr < mp->rx_ring_size; curr++) { if (mp->rx_skb[curr]) { dev_kfree_skb(mp->rx_skb[curr]); - mp->rx_ring_skbs--; + mp->rx_desc_count--; } } - if (mp->rx_ring_skbs) + if (mp->rx_desc_count) printk(KERN_ERR "%s: Error in freeing Rx Ring. %d skb's still" " stuck in RX Ring - ignoring them\n", dev->name, - mp->rx_ring_skbs); + mp->rx_desc_count); /* Free RX ring */ if (mp->rx_sram_size) iounmap(mp->p_rx_desc_area); @@ -952,7 +960,7 @@ static int mv643xx_eth_stop(struct net_device *dev) unsigned int port_num = mp->port_num; /* Mask all interrupts on ethernet port */ - mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), INT_MASK_ALL); + mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), ETH_INT_MASK_ALL); /* wait for previous write to complete */ mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); @@ -977,30 +985,6 @@ static int mv643xx_eth_stop(struct net_device *dev) } #ifdef MV643XX_NAPI -static void mv643xx_tx(struct net_device *dev) -{ - struct mv643xx_private *mp = netdev_priv(dev); - struct pkt_info pkt_info; - - while (eth_tx_return_desc(mp, &pkt_info) == ETH_OK) { - if (pkt_info.cmd_sts & ETH_TX_FIRST_DESC) - dma_unmap_single(NULL, pkt_info.buf_ptr, - pkt_info.byte_cnt, - DMA_TO_DEVICE); - else - dma_unmap_page(NULL, pkt_info.buf_ptr, - pkt_info.byte_cnt, - DMA_TO_DEVICE); - - if (pkt_info.return_info) - dev_kfree_skb_irq(pkt_info.return_info); - } - - if (netif_queue_stopped(dev) && - mp->tx_ring_size > mp->tx_ring_skbs + MAX_DESCS_PER_SKB) - netif_wake_queue(dev); -} - /* * mv643xx_poll * @@ -1014,7 +998,7 @@ static int mv643xx_poll(struct net_device *dev, int *budget) #ifdef MV643XX_TX_FAST_REFILL if (++mp->tx_clean_threshold > 5) { - mv643xx_tx(dev); + mv643xx_eth_free_completed_tx_descs(dev); mp->tx_clean_threshold = 0; } #endif @@ -1025,7 +1009,6 @@ static int mv643xx_poll(struct net_device *dev, int *budget) if (orig_budget > dev->quota) orig_budget = dev->quota; work_done = mv643xx_eth_receive_queue(dev, orig_budget); - mp->rx_task.func(dev); *budget -= work_done; dev->quota -= work_done; if (work_done >= orig_budget) @@ -1037,14 +1020,17 @@ static int mv643xx_poll(struct net_device *dev, int *budget) mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0); mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num), 0); mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), - INT_UNMASK_ALL); + ETH_INT_UNMASK_ALL); } return done ? 0 : 1; } #endif -/* Hardware can't handle unaligned fragments smaller than 9 bytes. +/** + * has_tiny_unaligned_frags - check if skb has any small, unaligned fragments + * + * Hardware can't handle unaligned fragments smaller than 9 bytes. * This helper function detects that case. */ @@ -1061,223 +1047,166 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb) return 0; } +/** + * eth_alloc_tx_desc_index - return the index of the next available tx desc + */ +static int eth_alloc_tx_desc_index(struct mv643xx_private *mp) +{ + int tx_desc_curr; -/* - * mv643xx_eth_start_xmit - * - * This function is queues a packet in the Tx descriptor for - * required port. - * - * Input : skb - a pointer to socket buffer - * dev - a pointer to the required port + BUG_ON(mp->tx_desc_count >= mp->tx_ring_size); + + tx_desc_curr = mp->tx_curr_desc_q; + mp->tx_curr_desc_q = (tx_desc_curr + 1) % mp->tx_ring_size; + + BUG_ON(mp->tx_curr_desc_q == mp->tx_used_desc_q); + + return tx_desc_curr; +} + +/** + * eth_tx_fill_frag_descs - fill tx hw descriptors for an skb's fragments. * - * Output : zero upon success + * Ensure the data for each fragment to be transmitted is mapped properly, + * then fill in descriptors in the tx hw queue. */ -static int mv643xx_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) +static void eth_tx_fill_frag_descs(struct mv643xx_private *mp, + struct sk_buff *skb) { - struct mv643xx_private *mp = netdev_priv(dev); - struct net_device_stats *stats = &mp->stats; - ETH_FUNC_RET_STATUS status; - unsigned long flags; - struct pkt_info pkt_info; + int frag; + int tx_index; + struct eth_tx_desc *desc; - if (netif_queue_stopped(dev)) { - printk(KERN_ERR - "%s: Tried sending packet when interface is stopped\n", - dev->name); - return 1; + for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { + skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; + + tx_index = eth_alloc_tx_desc_index(mp); + desc = &mp->p_tx_desc_area[tx_index]; + + desc->cmd_sts = ETH_BUFFER_OWNED_BY_DMA; + /* Last Frag enables interrupt and frees the skb */ + if (frag == (skb_shinfo(skb)->nr_frags - 1)) { + desc->cmd_sts |= ETH_ZERO_PADDING | + ETH_TX_LAST_DESC | + ETH_TX_ENABLE_INTERRUPT; + mp->tx_skb[tx_index] = skb; + } else + mp->tx_skb[tx_index] = 0; + + desc = &mp->p_tx_desc_area[tx_index]; + desc->l4i_chk = 0; + desc->byte_cnt = this_frag->size; + desc->buf_ptr = dma_map_page(NULL, this_frag->page, + this_frag->page_offset, + this_frag->size, + DMA_TO_DEVICE); } +} - /* This is a hard error, log it. */ - if ((mp->tx_ring_size - mp->tx_ring_skbs) <= - (skb_shinfo(skb)->nr_frags + 1)) { - netif_stop_queue(dev); - printk(KERN_ERR - "%s: Bug in mv643xx_eth - Trying to transmit when" - " queue full !\n", dev->name); - return 1; - } +/** + * eth_tx_submit_descs_for_skb - submit data from an skb to the tx hw + * + * Ensure the data for an skb to be transmitted is mapped properly, + * then fill in descriptors in the tx hw queue and start the hardware. + */ +static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp, + struct sk_buff *skb) +{ + int tx_index; + struct eth_tx_desc *desc; + u32 cmd_sts; + int length; + int nr_frags = skb_shinfo(skb)->nr_frags; - /* Paranoid check - this shouldn't happen */ - if (skb == NULL) { - stats->tx_dropped++; - printk(KERN_ERR "mv64320_eth paranoid check failed\n"); - return 1; - } + cmd_sts = ETH_TX_FIRST_DESC | ETH_GEN_CRC | ETH_BUFFER_OWNED_BY_DMA; -#ifdef MV643XX_CHECKSUM_OFFLOAD_TX - if (has_tiny_unaligned_frags(skb)) { - if ((skb_linearize(skb, GFP_ATOMIC) != 0)) { - stats->tx_dropped++; - printk(KERN_DEBUG "%s: failed to linearize tiny " - "unaligned fragment\n", dev->name); - return 1; - } - } + tx_index = eth_alloc_tx_desc_index(mp); + desc = &mp->p_tx_desc_area[tx_index]; - spin_lock_irqsave(&mp->lock, flags); + if (nr_frags) { + eth_tx_fill_frag_descs(mp, skb); - if (!skb_shinfo(skb)->nr_frags) { - if (skb->ip_summed != CHECKSUM_HW) { - /* Errata BTS #50, IHL must be 5 if no HW checksum */ - pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT | - ETH_TX_FIRST_DESC | - ETH_TX_LAST_DESC | - 5 << ETH_TX_IHL_SHIFT; - pkt_info.l4i_chk = 0; - } else { - pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT | - ETH_TX_FIRST_DESC | - ETH_TX_LAST_DESC | - ETH_GEN_TCP_UDP_CHECKSUM | - ETH_GEN_IP_V_4_CHECKSUM | - skb->nh.iph->ihl << ETH_TX_IHL_SHIFT; - /* CPU already calculated pseudo header checksum. */ - if ((skb->protocol == ETH_P_IP) && - (skb->nh.iph->protocol == IPPROTO_UDP) ) { - pkt_info.cmd_sts |= ETH_UDP_FRAME; - pkt_info.l4i_chk = skb->h.uh->check; - } else if ((skb->protocol == ETH_P_IP) && - (skb->nh.iph->protocol == IPPROTO_TCP)) - pkt_info.l4i_chk = skb->h.th->check; - else { - printk(KERN_ERR - "%s: chksum proto != IPv4 TCP or UDP\n", - dev->name); - spin_unlock_irqrestore(&mp->lock, flags); - return 1; - } - } - pkt_info.byte_cnt = skb->len; - pkt_info.buf_ptr = dma_map_single(NULL, skb->data, skb->len, - DMA_TO_DEVICE); - pkt_info.return_info = skb; - status = eth_port_send(mp, &pkt_info); - if ((status == ETH_ERROR) || (status == ETH_QUEUE_FULL)) - printk(KERN_ERR "%s: Error on transmitting packet\n", - dev->name); - stats->tx_bytes += pkt_info.byte_cnt; + length = skb_headlen(skb); + mp->tx_skb[tx_index] = 0; } else { - unsigned int frag; + cmd_sts |= ETH_ZERO_PADDING | + ETH_TX_LAST_DESC | + ETH_TX_ENABLE_INTERRUPT; + length = skb->len; + mp->tx_skb[tx_index] = skb; + } - /* first frag which is skb header */ - pkt_info.byte_cnt = skb_headlen(skb); - pkt_info.buf_ptr = dma_map_single(NULL, skb->data, - skb_headlen(skb), - DMA_TO_DEVICE); - pkt_info.l4i_chk = 0; - pkt_info.return_info = 0; - - if (skb->ip_summed != CHECKSUM_HW) - /* Errata BTS #50, IHL must be 5 if no HW checksum */ - pkt_info.cmd_sts = ETH_TX_FIRST_DESC | - 5 << ETH_TX_IHL_SHIFT; - else { - pkt_info.cmd_sts = ETH_TX_FIRST_DESC | - ETH_GEN_TCP_UDP_CHECKSUM | - ETH_GEN_IP_V_4_CHECKSUM | - skb->nh.iph->ihl << ETH_TX_IHL_SHIFT; - /* CPU already calculated pseudo header checksum. */ - if ((skb->protocol == ETH_P_IP) && - (skb->nh.iph->protocol == IPPROTO_UDP)) { - pkt_info.cmd_sts |= ETH_UDP_FRAME; - pkt_info.l4i_chk = skb->h.uh->check; - } else if ((skb->protocol == ETH_P_IP) && - (skb->nh.iph->protocol == IPPROTO_TCP)) - pkt_info.l4i_chk = skb->h.th->check; - else { - printk(KERN_ERR - "%s: chksum proto != IPv4 TCP or UDP\n", - dev->name); - spin_unlock_irqrestore(&mp->lock, flags); - return 1; - } - } + desc->byte_cnt = length; + desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); - status = eth_port_send(mp, &pkt_info); - if (status != ETH_OK) { - if ((status == ETH_ERROR)) - printk(KERN_ERR - "%s: Error on transmitting packet\n", - dev->name); - if (status == ETH_QUEUE_FULL) - printk("Error on Queue Full \n"); - if (status == ETH_QUEUE_LAST_RESOURCE) - printk("Tx resource error \n"); + if (skb->ip_summed == CHECKSUM_HW) { + BUG_ON(skb->protocol != ETH_P_IP); + + cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM | + ETH_GEN_IP_V_4_CHECKSUM | + skb->nh.iph->ihl << ETH_TX_IHL_SHIFT; + + switch (skb->nh.iph->protocol) { + case IPPROTO_UDP: + cmd_sts |= ETH_UDP_FRAME; + desc->l4i_chk = skb->h.uh->check; + break; + case IPPROTO_TCP: + desc->l4i_chk = skb->h.th->check; + break; + default: + BUG(); } - stats->tx_bytes += pkt_info.byte_cnt; - - /* Check for the remaining frags */ - for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { - skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; - pkt_info.l4i_chk = 0x0000; - pkt_info.cmd_sts = 0x00000000; - - /* Last Frag enables interrupt and frees the skb */ - if (frag == (skb_shinfo(skb)->nr_frags - 1)) { - pkt_info.cmd_sts |= ETH_TX_ENABLE_INTERRUPT | - ETH_TX_LAST_DESC; - pkt_info.return_info = skb; - } else { - pkt_info.return_info = 0; - } - pkt_info.l4i_chk = 0; - pkt_info.byte_cnt = this_frag->size; + } else { + /* Errata BTS #50, IHL must be 5 if no HW checksum */ + cmd_sts |= 5 << ETH_TX_IHL_SHIFT; + desc->l4i_chk = 0; + } + + /* ensure all other descriptors are written before first cmd_sts */ + wmb(); + desc->cmd_sts = cmd_sts; - pkt_info.buf_ptr = dma_map_page(NULL, this_frag->page, - this_frag->page_offset, - this_frag->size, - DMA_TO_DEVICE); + /* ensure all descriptors are written before poking hardware */ + wmb(); + mv643xx_eth_port_enable_tx(mp->port_num, ETH_TX_QUEUES_ENABLED); - status = eth_port_send(mp, &pkt_info); + mp->tx_desc_count += nr_frags + 1; +} - if (status != ETH_OK) { - if ((status == ETH_ERROR)) - printk(KERN_ERR "%s: Error on " - "transmitting packet\n", - dev->name); +/** + * mv643xx_eth_start_xmit - queue an skb to the hardware for transmission + * + */ +static int mv643xx_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mv643xx_private *mp = netdev_priv(dev); + struct net_device_stats *stats = &mp->stats; + unsigned long flags; - if (status == ETH_QUEUE_LAST_RESOURCE) - printk("Tx resource error \n"); + BUG_ON(netif_queue_stopped(dev)); + BUG_ON(skb == NULL); + BUG_ON(mp->tx_ring_size - mp->tx_desc_count < MAX_DESCS_PER_SKB); - if (status == ETH_QUEUE_FULL) - printk("Queue is full \n"); - } - stats->tx_bytes += pkt_info.byte_cnt; + if (has_tiny_unaligned_frags(skb)) { + if ((skb_linearize(skb, GFP_ATOMIC) != 0)) { + stats->tx_dropped++; + printk(KERN_DEBUG "%s: failed to linearize tiny " + "unaligned fragment\n", dev->name); + return 1; } } -#else - spin_lock_irqsave(&mp->lock, flags); - pkt_info.cmd_sts = ETH_TX_ENABLE_INTERRUPT | ETH_TX_FIRST_DESC | - ETH_TX_LAST_DESC; - pkt_info.l4i_chk = 0; - pkt_info.byte_cnt = skb->len; - pkt_info.buf_ptr = dma_map_single(NULL, skb->data, skb->len, - DMA_TO_DEVICE); - pkt_info.return_info = skb; - status = eth_port_send(mp, &pkt_info); - if ((status == ETH_ERROR) || (status == ETH_QUEUE_FULL)) - printk(KERN_ERR "%s: Error on transmitting packet\n", - dev->name); - stats->tx_bytes += pkt_info.byte_cnt; -#endif - - /* Check if TX queue can handle another skb. If not, then - * signal higher layers to stop requesting TX - */ - if (mp->tx_ring_size <= (mp->tx_ring_skbs + MAX_DESCS_PER_SKB)) - /* - * Stop getting skb's from upper layers. - * Getting skb's from upper layers will be enabled again after - * packets are released. - */ - netif_stop_queue(dev); + spin_lock_irqsave(&mp->lock, flags); - /* Update statistics and start of transmittion time */ + eth_tx_submit_descs_for_skb(mp, skb); + stats->tx_bytes = skb->len; stats->tx_packets++; dev->trans_start = jiffies; + if (mp->tx_ring_size - mp->tx_desc_count < MAX_DESCS_PER_SKB) + netif_stop_queue(dev); + spin_unlock_irqrestore(&mp->lock, flags); return 0; /* success */ @@ -1306,16 +1235,45 @@ static void mv643xx_netpoll(struct net_device *netdev) struct mv643xx_private *mp = netdev_priv(netdev); int port_num = mp->port_num; - mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), INT_MASK_ALL); + mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), ETH_INT_MASK_ALL); /* wait for previous write to complete */ mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); mv643xx_eth_int_handler(netdev->irq, netdev, NULL); - mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), INT_UNMASK_ALL); + mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), ETH_INT_UNMASK_ALL); } #endif +static void mv643xx_init_ethtool_cmd(struct net_device *dev, int phy_address, + int speed, int duplex, + struct ethtool_cmd *cmd) +{ + struct mv643xx_private *mp = netdev_priv(dev); + + memset(cmd, 0, sizeof(*cmd)); + + cmd->port = PORT_MII; + cmd->transceiver = XCVR_INTERNAL; + cmd->phy_address = phy_address; + + if (speed == 0) { + cmd->autoneg = AUTONEG_ENABLE; + /* mii lib checks, but doesn't use speed on AUTONEG_ENABLE */ + cmd->speed = SPEED_100; + cmd->advertising = ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full; + if (mp->mii.supports_gmii) + cmd->advertising |= ADVERTISED_1000baseT_Full; + } else { + cmd->autoneg = AUTONEG_DISABLE; + cmd->speed = speed; + cmd->duplex = duplex; + } +} + /*/ * mv643xx_eth_probe * @@ -1336,6 +1294,9 @@ static int mv643xx_eth_probe(struct platform_device *pdev) u8 *p; struct resource *res; int err; + struct ethtool_cmd cmd; + int duplex = DUPLEX_HALF; + int speed = 0; /* default to auto-negotiation */ dev = alloc_etherdev(sizeof(struct mv643xx_private)); if (!dev) @@ -1373,6 +1334,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->tx_queue_len = mp->tx_ring_size; dev->base_addr = 0; dev->change_mtu = mv643xx_eth_change_mtu; + dev->do_ioctl = mv643xx_eth_do_ioctl; SET_ETHTOOL_OPS(dev, &mv643xx_ethtool_ops); #ifdef MV643XX_CHECKSUM_OFFLOAD_TX @@ -1393,33 +1355,17 @@ static int mv643xx_eth_probe(struct platform_device *pdev) /* set default config values */ eth_port_uc_addr_get(dev, dev->dev_addr); - mp->port_config = MV643XX_ETH_PORT_CONFIG_DEFAULT_VALUE; - mp->port_config_extend = MV643XX_ETH_PORT_CONFIG_EXTEND_DEFAULT_VALUE; - mp->port_sdma_config = MV643XX_ETH_PORT_SDMA_CONFIG_DEFAULT_VALUE; - mp->port_serial_control = MV643XX_ETH_PORT_SERIAL_CONTROL_DEFAULT_VALUE; mp->rx_ring_size = MV643XX_ETH_PORT_DEFAULT_RECEIVE_QUEUE_SIZE; mp->tx_ring_size = MV643XX_ETH_PORT_DEFAULT_TRANSMIT_QUEUE_SIZE; pd = pdev->dev.platform_data; if (pd) { - if (pd->mac_addr != NULL) + if (pd->mac_addr) memcpy(dev->dev_addr, pd->mac_addr, 6); if (pd->phy_addr || pd->force_phy_addr) ethernet_phy_set(port_num, pd->phy_addr); - if (pd->port_config || pd->force_port_config) - mp->port_config = pd->port_config; - - if (pd->port_config_extend || pd->force_port_config_extend) - mp->port_config_extend = pd->port_config_extend; - - if (pd->port_sdma_config || pd->force_port_sdma_config) - mp->port_sdma_config = pd->port_sdma_config; - - if (pd->port_serial_control || pd->force_port_serial_control) - mp->port_serial_control = pd->port_serial_control; - if (pd->rx_queue_size) mp->rx_ring_size = pd->rx_queue_size; @@ -1435,16 +1381,33 @@ static int mv643xx_eth_probe(struct platform_device *pdev) mp->rx_sram_size = pd->rx_sram_size; mp->rx_sram_addr = pd->rx_sram_addr; } + + duplex = pd->duplex; + speed = pd->speed; } + /* Hook up MII support for ethtool */ + mp->mii.dev = dev; + mp->mii.mdio_read = mv643xx_mdio_read; + mp->mii.mdio_write = mv643xx_mdio_write; + mp->mii.phy_id = ethernet_phy_get(port_num); + mp->mii.phy_id_mask = 0x3f; + mp->mii.reg_num_mask = 0x1f; + err = ethernet_phy_detect(port_num); if (err) { pr_debug("MV643xx ethernet port %d: " "No PHY detected at addr %d\n", port_num, ethernet_phy_get(port_num)); - return err; + goto out; } + ethernet_phy_reset(port_num); + mp->mii.supports_gmii = mii_check_gmii_support(&mp->mii); + mv643xx_init_ethtool_cmd(dev, mp->mii.phy_id, speed, duplex, &cmd); + mv643xx_eth_update_pscr(dev, &cmd); + mv643xx_set_settings(dev, &cmd); + err = register_netdev(dev); if (err) goto out; @@ -1689,26 +1652,9 @@ MODULE_DESCRIPTION("Ethernet driver for Marvell MV643XX"); * to the Rx descriptor ring to enable the reuse of this source. * Return Rx resource is done using the eth_rx_return_buff API. * - * Transmit operation: - * The eth_port_send API supports Scatter-Gather which enables to - * send a packet spanned over multiple buffers. This means that - * for each packet info structure given by the user and put into - * the Tx descriptors ring, will be transmitted only if the 'LAST' - * bit will be set in the packet info command status field. This - * API also consider restriction regarding buffer alignments and - * sizes. - * The user must return a Tx resource after ensuring the buffer - * has been transmitted to enable the Tx ring indexes to update. - * - * BOARD LAYOUT - * This device is on-board. No jumper diagram is necessary. - * - * EXTERNAL INTERFACE - * * Prior to calling the initialization routine eth_port_init() the user * must set the following fields under mv643xx_private struct: * port_num User Ethernet port number. - * port_mac_addr[6] User defined port MAC address. * port_config User port configuration value. * port_config_extend User port config extend value. * port_sdma_config User port SDMA config value. @@ -1725,20 +1671,12 @@ MODULE_DESCRIPTION("Ethernet driver for Marvell MV643XX"); * return_info Tx/Rx user resource return information. */ -/* defines */ -/* SDMA command macros */ -#define ETH_ENABLE_TX_QUEUE(eth_port) \ - mv_write(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(eth_port), 1) - -/* locals */ - /* PHY routines */ static int ethernet_phy_get(unsigned int eth_port_num); static void ethernet_phy_set(unsigned int eth_port_num, int phy_addr); /* Ethernet Port routines */ -static int eth_port_uc_addr(unsigned int eth_port_num, unsigned char uc_nibble, - int option); +static void eth_port_set_filter_table_entry(int table, unsigned char entry); /* * eth_port_init - Initialize the Ethernet port driver @@ -1766,17 +1704,11 @@ static int eth_port_uc_addr(unsigned int eth_port_num, unsigned char uc_nibble, */ static void eth_port_init(struct mv643xx_private *mp) { - mp->port_rx_queue_command = 0; - mp->port_tx_queue_command = 0; - mp->rx_resource_err = 0; - mp->tx_resource_err = 0; eth_port_reset(mp->port_num); eth_port_init_mac_tables(mp->port_num); - - ethernet_phy_reset(mp->port_num); } /* @@ -1798,7 +1730,7 @@ static void eth_port_init(struct mv643xx_private *mp) * and ether_init_rx_desc_ring for Rx queues). * * INPUT: - * struct mv643xx_private *mp Ethernet port control struct + * dev - a pointer to the required interface * * OUTPUT: * Ethernet port is ready to receive and transmit. @@ -1806,10 +1738,13 @@ static void eth_port_init(struct mv643xx_private *mp) * RETURN: * None. */ -static void eth_port_start(struct mv643xx_private *mp) +static void eth_port_start(struct net_device *dev) { + struct mv643xx_private *mp = netdev_priv(dev); unsigned int port_num = mp->port_num; int tx_curr_desc, rx_curr_desc; + u32 pscr; + struct ethtool_cmd ethtool_cmd; /* Assignment of Tx CTRP of given queue */ tx_curr_desc = mp->tx_curr_desc_q; @@ -1822,37 +1757,45 @@ static void eth_port_start(struct mv643xx_private *mp) (u32)((struct eth_rx_desc *)mp->rx_desc_dma + rx_curr_desc)); /* Add the assigned Ethernet address to the port's address table */ - eth_port_uc_addr_set(port_num, mp->port_mac_addr); + eth_port_uc_addr_set(port_num, dev->dev_addr); /* Assign port configuration and command. */ - mv_write(MV643XX_ETH_PORT_CONFIG_REG(port_num), mp->port_config); + mv_write(MV643XX_ETH_PORT_CONFIG_REG(port_num), + MV643XX_ETH_PORT_CONFIG_DEFAULT_VALUE); mv_write(MV643XX_ETH_PORT_CONFIG_EXTEND_REG(port_num), - mp->port_config_extend); + MV643XX_ETH_PORT_CONFIG_EXTEND_DEFAULT_VALUE); + pscr = mv_read(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num)); - /* Increase the Rx side buffer size if supporting GigE */ - if (mp->port_serial_control & MV643XX_ETH_SET_GMII_SPEED_TO_1000) - mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), - (mp->port_serial_control & 0xfff1ffff) | (0x5 << 17)); - else - mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), - mp->port_serial_control); + pscr &= ~(MV643XX_ETH_SERIAL_PORT_ENABLE | MV643XX_ETH_FORCE_LINK_PASS); + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), pscr); + + pscr |= MV643XX_ETH_DISABLE_AUTO_NEG_FOR_FLOW_CTRL | + MV643XX_ETH_DISABLE_AUTO_NEG_SPEED_GMII | + MV643XX_ETH_DISABLE_AUTO_NEG_FOR_DUPLX | + MV643XX_ETH_DO_NOT_FORCE_LINK_FAIL | + MV643XX_ETH_SERIAL_PORT_CONTROL_RESERVED; - mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), - mv_read(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num)) | - MV643XX_ETH_SERIAL_PORT_ENABLE); + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), pscr); + + pscr |= MV643XX_ETH_SERIAL_PORT_ENABLE; + mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), pscr); /* Assign port SDMA configuration */ mv_write(MV643XX_ETH_SDMA_CONFIG_REG(port_num), - mp->port_sdma_config); + MV643XX_ETH_PORT_SDMA_CONFIG_DEFAULT_VALUE); /* Enable port Rx. */ - mv_write(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num), - mp->port_rx_queue_command); + mv643xx_eth_port_enable_rx(port_num, ETH_RX_QUEUES_ENABLED); /* Disable port bandwidth limits by clearing MTU register */ mv_write(MV643XX_ETH_MAXIMUM_TRANSMIT_UNIT(port_num), 0); + + /* save phy settings across reset */ + mv643xx_get_settings(dev, ðtool_cmd); + ethernet_phy_reset(mp->port_num); + mv643xx_set_settings(dev, ðtool_cmd); } /* @@ -1866,8 +1809,9 @@ static void eth_port_start(struct mv643xx_private *mp) * char * p_addr Address to be set * * OUTPUT: - * Set MAC address low and high registers. also calls eth_port_uc_addr() - * To set the unicast table with the proper information. + * Set MAC address low and high registers. also calls + * eth_port_set_filter_table_entry() to set the unicast + * table with the proper information. * * RETURN: * N/A. @@ -1878,6 +1822,7 @@ static void eth_port_uc_addr_set(unsigned int eth_port_num, { unsigned int mac_h; unsigned int mac_l; + int table; mac_l = (p_addr[4] << 8) | (p_addr[5]); mac_h = (p_addr[0] << 24) | (p_addr[1] << 16) | (p_addr[2] << 8) | @@ -1887,9 +1832,8 @@ static void eth_port_uc_addr_set(unsigned int eth_port_num, mv_write(MV643XX_ETH_MAC_ADDR_HIGH(eth_port_num), mac_h); /* Accept frames of this address */ - eth_port_uc_addr(eth_port_num, p_addr[5], ACCEPT_MAC_ADDR); - - return; + table = MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE(eth_port_num); + eth_port_set_filter_table_entry(table, p_addr[5] & 0x0f); } /* @@ -1928,72 +1872,6 @@ static void eth_port_uc_addr_get(struct net_device *dev, unsigned char *p_addr) } /* - * eth_port_uc_addr - This function Set the port unicast address table - * - * DESCRIPTION: - * This function locates the proper entry in the Unicast table for the - * specified MAC nibble and sets its properties according to function - * parameters. - * - * INPUT: - * unsigned int eth_port_num Port number. - * unsigned char uc_nibble Unicast MAC Address last nibble. - * int option 0 = Add, 1 = remove address. - * - * OUTPUT: - * This function add/removes MAC addresses from the port unicast address - * table. - * - * RETURN: - * true is output succeeded. - * false if option parameter is invalid. - * - */ -static int eth_port_uc_addr(unsigned int eth_port_num, unsigned char uc_nibble, - int option) -{ - unsigned int unicast_reg; - unsigned int tbl_offset; - unsigned int reg_offset; - - /* Locate the Unicast table entry */ - uc_nibble = (0xf & uc_nibble); - tbl_offset = (uc_nibble / 4) * 4; /* Register offset from unicast table base */ - reg_offset = uc_nibble % 4; /* Entry offset within the above register */ - - switch (option) { - case REJECT_MAC_ADDR: - /* Clear accepts frame bit at given unicast DA table entry */ - unicast_reg = mv_read((MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE - (eth_port_num) + tbl_offset)); - - unicast_reg &= (0x0E << (8 * reg_offset)); - - mv_write((MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE - (eth_port_num) + tbl_offset), unicast_reg); - break; - - case ACCEPT_MAC_ADDR: - /* Set accepts frame bit at unicast DA filter table entry */ - unicast_reg = - mv_read((MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE - (eth_port_num) + tbl_offset)); - - unicast_reg |= (0x01 << (8 * reg_offset)); - - mv_write((MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE - (eth_port_num) + tbl_offset), unicast_reg); - - break; - - default: - return 0; - } - - return 1; -} - -/* * The entries in each table are indexed by a hash of a packet's MAC * address. One bit in each entry determines whether the packet is * accepted. There are 4 entries (each 8 bits wide) in each register @@ -2205,8 +2083,8 @@ static void eth_port_init_mac_tables(unsigned int eth_port_num) /* Clear DA filter unicast table (Ex_dFUT) */ for (table_index = 0; table_index <= 0xC; table_index += 4) - mv_write((MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE - (eth_port_num) + table_index), 0); + mv_write(MV643XX_ETH_DA_FILTER_UNICAST_TABLE_BASE + (eth_port_num) + table_index, 0); for (table_index = 0; table_index <= 0xFC; table_index += 4) { /* Clear DA filter special multicast table (Ex_dFSMT) */ @@ -2389,6 +2267,73 @@ static void ethernet_phy_reset(unsigned int eth_port_num) eth_port_read_smi_reg(eth_port_num, 0, &phy_reg_data); phy_reg_data |= 0x8000; /* Set bit 15 to reset the PHY */ eth_port_write_smi_reg(eth_port_num, 0, phy_reg_data); + + /* wait for PHY to come out of reset */ + do { + udelay(1); + eth_port_read_smi_reg(eth_port_num, 0, &phy_reg_data); + } while (phy_reg_data & 0x8000); +} + +static void mv643xx_eth_port_enable_tx(unsigned int port_num, + unsigned int queues) +{ + mv_write(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num), queues); +} + +static void mv643xx_eth_port_enable_rx(unsigned int port_num, + unsigned int queues) +{ + mv_write(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num), queues); +} + +static unsigned int mv643xx_eth_port_disable_tx(unsigned int port_num) +{ + u32 queues; + + /* Stop Tx port activity. Check port Tx activity. */ + queues = mv_read(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num)) + & 0xFF; + if (queues) { + /* Issue stop command for active queues only */ + mv_write(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num), + (queues << 8)); + + /* Wait for all Tx activity to terminate. */ + /* Check port cause register that all Tx queues are stopped */ + while (mv_read(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num)) + & 0xFF) + udelay(PHY_WAIT_MICRO_SECONDS); + + /* Wait for Tx FIFO to empty */ + while (mv_read(MV643XX_ETH_PORT_STATUS_REG(port_num)) & + ETH_PORT_TX_FIFO_EMPTY) + udelay(PHY_WAIT_MICRO_SECONDS); + } + + return queues; +} + +static unsigned int mv643xx_eth_port_disable_rx(unsigned int port_num) +{ + u32 queues; + + /* Stop Rx port activity. Check port Rx activity. */ + queues = mv_read(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num)) + & 0xFF; + if (queues) { + /* Issue stop command for active queues only */ + mv_write(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num), + (queues << 8)); + + /* Wait for all Rx activity to terminate. */ + /* Check port cause register that all Rx queues are stopped */ + while (mv_read(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num)) + & 0xFF) + udelay(PHY_WAIT_MICRO_SECONDS); + } + + return queues; } /* @@ -2413,70 +2358,21 @@ static void eth_port_reset(unsigned int port_num) { unsigned int reg_data; - /* Stop Tx port activity. Check port Tx activity. */ - reg_data = mv_read(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num)); - - if (reg_data & 0xFF) { - /* Issue stop command for active channels only */ - mv_write(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num), - (reg_data << 8)); - - /* Wait for all Tx activity to terminate. */ - /* Check port cause register that all Tx queues are stopped */ - while (mv_read(MV643XX_ETH_TRANSMIT_QUEUE_COMMAND_REG(port_num)) - & 0xFF) - udelay(10); - } - - /* Stop Rx port activity. Check port Rx activity. */ - reg_data = mv_read(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num)); - - if (reg_data & 0xFF) { - /* Issue stop command for active channels only */ - mv_write(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num), - (reg_data << 8)); - - /* Wait for all Rx activity to terminate. */ - /* Check port cause register that all Rx queues are stopped */ - while (mv_read(MV643XX_ETH_RECEIVE_QUEUE_COMMAND_REG(port_num)) - & 0xFF) - udelay(10); - } + mv643xx_eth_port_disable_tx(port_num); + mv643xx_eth_port_disable_rx(port_num); /* Clear all MIB counters */ eth_clear_mib_counters(port_num); /* Reset the Enable bit in the Configuration Register */ reg_data = mv_read(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num)); - reg_data &= ~MV643XX_ETH_SERIAL_PORT_ENABLE; + reg_data &= ~(MV643XX_ETH_SERIAL_PORT_ENABLE | + MV643XX_ETH_DO_NOT_FORCE_LINK_FAIL | + MV643XX_ETH_FORCE_LINK_PASS); mv_write(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num), reg_data); } -static int eth_port_autoneg_supported(unsigned int eth_port_num) -{ - unsigned int phy_reg_data0; - - eth_port_read_smi_reg(eth_port_num, 0, &phy_reg_data0); - - return phy_reg_data0 & 0x1000; -} - -static int eth_port_link_is_up(unsigned int eth_port_num) -{ - unsigned int phy_reg_data1; - - eth_port_read_smi_reg(eth_port_num, 1, &phy_reg_data1); - - if (eth_port_autoneg_supported(eth_port_num)) { - if (phy_reg_data1 & 0x20) /* auto-neg complete */ - return 1; - } else if (phy_reg_data1 & 0x4) /* link up */ - return 1; - - return 0; -} - /* * eth_port_read_smi_reg - Read PHY registers * @@ -2582,250 +2478,21 @@ out: } /* - * eth_port_send - Send an Ethernet packet - * - * DESCRIPTION: - * This routine send a given packet described by p_pktinfo parameter. It - * supports transmitting of a packet spaned over multiple buffers. The - * routine updates 'curr' and 'first' indexes according to the packet - * segment passed to the routine. In case the packet segment is first, - * the 'first' index is update. In any case, the 'curr' index is updated. - * If the routine get into Tx resource error it assigns 'curr' index as - * 'first'. This way the function can abort Tx process of multiple - * descriptors per packet. - * - * INPUT: - * struct mv643xx_private *mp Ethernet Port Control srtuct. - * struct pkt_info *p_pkt_info User packet buffer. - * - * OUTPUT: - * Tx ring 'curr' and 'first' indexes are updated. - * - * RETURN: - * ETH_QUEUE_FULL in case of Tx resource error. - * ETH_ERROR in case the routine can not access Tx desc ring. - * ETH_QUEUE_LAST_RESOURCE if the routine uses the last Tx resource. - * ETH_OK otherwise. - * - */ -#ifdef MV643XX_CHECKSUM_OFFLOAD_TX -/* - * Modified to include the first descriptor pointer in case of SG + * Wrappers for MII support library. */ -static ETH_FUNC_RET_STATUS eth_port_send(struct mv643xx_private *mp, - struct pkt_info *p_pkt_info) -{ - int tx_desc_curr, tx_desc_used, tx_first_desc, tx_next_desc; - struct eth_tx_desc *current_descriptor; - struct eth_tx_desc *first_descriptor; - u32 command; - - /* Do not process Tx ring in case of Tx ring resource error */ - if (mp->tx_resource_err) - return ETH_QUEUE_FULL; - - /* - * The hardware requires that each buffer that is <= 8 bytes - * in length must be aligned on an 8 byte boundary. - */ - if (p_pkt_info->byte_cnt <= 8 && p_pkt_info->buf_ptr & 0x7) { - printk(KERN_ERR - "mv643xx_eth port %d: packet size <= 8 problem\n", - mp->port_num); - return ETH_ERROR; - } - - mp->tx_ring_skbs++; - BUG_ON(mp->tx_ring_skbs > mp->tx_ring_size); - - /* Get the Tx Desc ring indexes */ - tx_desc_curr = mp->tx_curr_desc_q; - tx_desc_used = mp->tx_used_desc_q; - - current_descriptor = &mp->p_tx_desc_area[tx_desc_curr]; - - tx_next_desc = (tx_desc_curr + 1) % mp->tx_ring_size; - - current_descriptor->buf_ptr = p_pkt_info->buf_ptr; - current_descriptor->byte_cnt = p_pkt_info->byte_cnt; - current_descriptor->l4i_chk = p_pkt_info->l4i_chk; - mp->tx_skb[tx_desc_curr] = p_pkt_info->return_info; - - command = p_pkt_info->cmd_sts | ETH_ZERO_PADDING | ETH_GEN_CRC | - ETH_BUFFER_OWNED_BY_DMA; - if (command & ETH_TX_FIRST_DESC) { - tx_first_desc = tx_desc_curr; - mp->tx_first_desc_q = tx_first_desc; - first_descriptor = current_descriptor; - mp->tx_first_command = command; - } else { - tx_first_desc = mp->tx_first_desc_q; - first_descriptor = &mp->p_tx_desc_area[tx_first_desc]; - BUG_ON(first_descriptor == NULL); - current_descriptor->cmd_sts = command; - } - - if (command & ETH_TX_LAST_DESC) { - wmb(); - first_descriptor->cmd_sts = mp->tx_first_command; - - wmb(); - ETH_ENABLE_TX_QUEUE(mp->port_num); - - /* - * Finish Tx packet. Update first desc in case of Tx resource - * error */ - tx_first_desc = tx_next_desc; - mp->tx_first_desc_q = tx_first_desc; - } - - /* Check for ring index overlap in the Tx desc ring */ - if (tx_next_desc == tx_desc_used) { - mp->tx_resource_err = 1; - mp->tx_curr_desc_q = tx_first_desc; - - return ETH_QUEUE_LAST_RESOURCE; - } - - mp->tx_curr_desc_q = tx_next_desc; - - return ETH_OK; -} -#else -static ETH_FUNC_RET_STATUS eth_port_send(struct mv643xx_private *mp, - struct pkt_info *p_pkt_info) +static int mv643xx_mdio_read(struct net_device *dev, int phy_id, int location) { - int tx_desc_curr; - int tx_desc_used; - struct eth_tx_desc *current_descriptor; - unsigned int command_status; - - /* Do not process Tx ring in case of Tx ring resource error */ - if (mp->tx_resource_err) - return ETH_QUEUE_FULL; - - mp->tx_ring_skbs++; - BUG_ON(mp->tx_ring_skbs > mp->tx_ring_size); - - /* Get the Tx Desc ring indexes */ - tx_desc_curr = mp->tx_curr_desc_q; - tx_desc_used = mp->tx_used_desc_q; - current_descriptor = &mp->p_tx_desc_area[tx_desc_curr]; - - command_status = p_pkt_info->cmd_sts | ETH_ZERO_PADDING | ETH_GEN_CRC; - current_descriptor->buf_ptr = p_pkt_info->buf_ptr; - current_descriptor->byte_cnt = p_pkt_info->byte_cnt; - mp->tx_skb[tx_desc_curr] = p_pkt_info->return_info; - - /* Set last desc with DMA ownership and interrupt enable. */ - wmb(); - current_descriptor->cmd_sts = command_status | - ETH_BUFFER_OWNED_BY_DMA | ETH_TX_ENABLE_INTERRUPT; - - wmb(); - ETH_ENABLE_TX_QUEUE(mp->port_num); - - /* Finish Tx packet. Update first desc in case of Tx resource error */ - tx_desc_curr = (tx_desc_curr + 1) % mp->tx_ring_size; - - /* Update the current descriptor */ - mp->tx_curr_desc_q = tx_desc_curr; - - /* Check for ring index overlap in the Tx desc ring */ - if (tx_desc_curr == tx_desc_used) { - mp->tx_resource_err = 1; - return ETH_QUEUE_LAST_RESOURCE; - } + int val; + struct mv643xx_private *mp = netdev_priv(dev); - return ETH_OK; + eth_port_read_smi_reg(mp->port_num, location, &val); + return val; } -#endif -/* - * eth_tx_return_desc - Free all used Tx descriptors - * - * DESCRIPTION: - * This routine returns the transmitted packet information to the caller. - * It uses the 'first' index to support Tx desc return in case a transmit - * of a packet spanned over multiple buffer still in process. - * In case the Tx queue was in "resource error" condition, where there are - * no available Tx resources, the function resets the resource error flag. - * - * INPUT: - * struct mv643xx_private *mp Ethernet Port Control srtuct. - * struct pkt_info *p_pkt_info User packet buffer. - * - * OUTPUT: - * Tx ring 'first' and 'used' indexes are updated. - * - * RETURN: - * ETH_OK on success - * ETH_ERROR otherwise. - * - */ -static ETH_FUNC_RET_STATUS eth_tx_return_desc(struct mv643xx_private *mp, - struct pkt_info *p_pkt_info) +static void mv643xx_mdio_write(struct net_device *dev, int phy_id, int location, int val) { - int tx_desc_used; - int tx_busy_desc; - struct eth_tx_desc *p_tx_desc_used; - unsigned int command_status; - unsigned long flags; - int err = ETH_OK; - - spin_lock_irqsave(&mp->lock, flags); - -#ifdef MV643XX_CHECKSUM_OFFLOAD_TX - tx_busy_desc = mp->tx_first_desc_q; -#else - tx_busy_desc = mp->tx_curr_desc_q; -#endif - - /* Get the Tx Desc ring indexes */ - tx_desc_used = mp->tx_used_desc_q; - - p_tx_desc_used = &mp->p_tx_desc_area[tx_desc_used]; - - /* Sanity check */ - if (p_tx_desc_used == NULL) { - err = ETH_ERROR; - goto out; - } - - /* Stop release. About to overlap the current available Tx descriptor */ - if (tx_desc_used == tx_busy_desc && !mp->tx_resource_err) { - err = ETH_ERROR; - goto out; - } - - command_status = p_tx_desc_used->cmd_sts; - - /* Still transmitting... */ - if (command_status & (ETH_BUFFER_OWNED_BY_DMA)) { - err = ETH_ERROR; - goto out; - } - - /* Pass the packet information to the caller */ - p_pkt_info->cmd_sts = command_status; - p_pkt_info->return_info = mp->tx_skb[tx_desc_used]; - p_pkt_info->buf_ptr = p_tx_desc_used->buf_ptr; - p_pkt_info->byte_cnt = p_tx_desc_used->byte_cnt; - mp->tx_skb[tx_desc_used] = NULL; - - /* Update the next descriptor to release. */ - mp->tx_used_desc_q = (tx_desc_used + 1) % mp->tx_ring_size; - - /* Any Tx return cancels the Tx resource error status */ - mp->tx_resource_err = 0; - - BUG_ON(mp->tx_ring_skbs == 0); - mp->tx_ring_skbs--; - -out: - spin_unlock_irqrestore(&mp->lock, flags); - - return err; + struct mv643xx_private *mp = netdev_priv(dev); + eth_port_write_smi_reg(mp->port_num, location, val); } /* @@ -3017,111 +2684,6 @@ static const struct mv643xx_stats mv643xx_gstrings_stats[] = { #define MV643XX_STATS_LEN \ sizeof(mv643xx_gstrings_stats) / sizeof(struct mv643xx_stats) -static int -mv643xx_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) -{ - struct mv643xx_private *mp = netdev->priv; - int port_num = mp->port_num; - int autoneg = eth_port_autoneg_supported(port_num); - int mode_10_bit; - int auto_duplex; - int half_duplex = 0; - int full_duplex = 0; - int auto_speed; - int speed_10 = 0; - int speed_100 = 0; - int speed_1000 = 0; - - u32 pcs = mv_read(MV643XX_ETH_PORT_SERIAL_CONTROL_REG(port_num)); - u32 psr = mv_read(MV643XX_ETH_PORT_STATUS_REG(port_num)); - - mode_10_bit = psr & MV643XX_ETH_PORT_STATUS_MODE_10_BIT; - - if (mode_10_bit) { - ecmd->supported = SUPPORTED_10baseT_Half; - } else { - ecmd->supported = (SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Full | - (autoneg ? SUPPORTED_Autoneg : 0) | - SUPPORTED_TP); - - auto_duplex = !(pcs & MV643XX_ETH_DISABLE_AUTO_NEG_FOR_DUPLX); - auto_speed = !(pcs & MV643XX_ETH_DISABLE_AUTO_NEG_SPEED_GMII); - - ecmd->advertising = ADVERTISED_TP; - - if (autoneg) { - ecmd->advertising |= ADVERTISED_Autoneg; - - if (auto_duplex) { - half_duplex = 1; - full_duplex = 1; - } else { - if (pcs & MV643XX_ETH_SET_FULL_DUPLEX_MODE) - full_duplex = 1; - else - half_duplex = 1; - } - - if (auto_speed) { - speed_10 = 1; - speed_100 = 1; - speed_1000 = 1; - } else { - if (pcs & MV643XX_ETH_SET_GMII_SPEED_TO_1000) - speed_1000 = 1; - else if (pcs & MV643XX_ETH_SET_MII_SPEED_TO_100) - speed_100 = 1; - else - speed_10 = 1; - } - - if (speed_10 & half_duplex) - ecmd->advertising |= ADVERTISED_10baseT_Half; - if (speed_10 & full_duplex) - ecmd->advertising |= ADVERTISED_10baseT_Full; - if (speed_100 & half_duplex) - ecmd->advertising |= ADVERTISED_100baseT_Half; - if (speed_100 & full_duplex) - ecmd->advertising |= ADVERTISED_100baseT_Full; - if (speed_1000) - ecmd->advertising |= ADVERTISED_1000baseT_Full; - } - } - - ecmd->port = PORT_TP; - ecmd->phy_address = ethernet_phy_get(port_num); - - ecmd->transceiver = XCVR_EXTERNAL; - - if (netif_carrier_ok(netdev)) { - if (mode_10_bit) - ecmd->speed = SPEED_10; - else { - if (psr & MV643XX_ETH_PORT_STATUS_GMII_1000) - ecmd->speed = SPEED_1000; - else if (psr & MV643XX_ETH_PORT_STATUS_MII_100) - ecmd->speed = SPEED_100; - else - ecmd->speed = SPEED_10; - } - - if (psr & MV643XX_ETH_PORT_STATUS_FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; - else - ecmd->duplex = DUPLEX_HALF; - } else { - ecmd->speed = -1; - ecmd->duplex = -1; - } - - ecmd->autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; - return 0; -} - static void mv643xx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { @@ -3168,15 +2730,41 @@ static void mv643xx_get_strings(struct net_device *netdev, uint32_t stringset, } } +static u32 mv643xx_eth_get_link(struct net_device *dev) +{ + struct mv643xx_private *mp = netdev_priv(dev); + + return mii_link_ok(&mp->mii); +} + +static int mv643xx_eth_nway_restart(struct net_device *dev) +{ + struct mv643xx_private *mp = netdev_priv(dev); + + return mii_nway_restart(&mp->mii); +} + +static int mv643xx_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mv643xx_private *mp = netdev_priv(dev); + + return generic_mii_ioctl(&mp->mii, if_mii(ifr), cmd, NULL); +} + static struct ethtool_ops mv643xx_ethtool_ops = { .get_settings = mv643xx_get_settings, + .set_settings = mv643xx_set_settings, .get_drvinfo = mv643xx_get_drvinfo, - .get_link = ethtool_op_get_link, + .get_link = mv643xx_eth_get_link, .get_sg = ethtool_op_get_sg, .set_sg = ethtool_op_set_sg, .get_strings = mv643xx_get_strings, .get_stats_count = mv643xx_get_stats_count, .get_ethtool_stats = mv643xx_get_ethtool_stats, + .get_strings = mv643xx_get_strings, + .get_stats_count = mv643xx_get_stats_count, + .get_ethtool_stats = mv643xx_get_ethtool_stats, + .nway_reset = mv643xx_eth_nway_restart, }; /************* End ethtool support *************************/ diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h index f769f9b..7754d19 100644 --- a/drivers/net/mv643xx_eth.h +++ b/drivers/net/mv643xx_eth.h @@ -5,53 +5,16 @@ #include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <linux/mii.h> #include <linux/mv643xx.h> -#define BIT0 0x00000001 -#define BIT1 0x00000002 -#define BIT2 0x00000004 -#define BIT3 0x00000008 -#define BIT4 0x00000010 -#define BIT5 0x00000020 -#define BIT6 0x00000040 -#define BIT7 0x00000080 -#define BIT8 0x00000100 -#define BIT9 0x00000200 -#define BIT10 0x00000400 -#define BIT11 0x00000800 -#define BIT12 0x00001000 -#define BIT13 0x00002000 -#define BIT14 0x00004000 -#define BIT15 0x00008000 -#define BIT16 0x00010000 -#define BIT17 0x00020000 -#define BIT18 0x00040000 -#define BIT19 0x00080000 -#define BIT20 0x00100000 -#define BIT21 0x00200000 -#define BIT22 0x00400000 -#define BIT23 0x00800000 -#define BIT24 0x01000000 -#define BIT25 0x02000000 -#define BIT26 0x04000000 -#define BIT27 0x08000000 -#define BIT28 0x10000000 -#define BIT29 0x20000000 -#define BIT30 0x40000000 -#define BIT31 0x80000000 - -/* - * The first part is the high level driver of the gigE ethernet ports. - */ - /* Checksum offload for Tx works for most packets, but * fails if previous packet sent did not use hw csum */ #define MV643XX_CHECKSUM_OFFLOAD_TX #define MV643XX_NAPI #define MV643XX_TX_FAST_REFILL -#undef MV643XX_RX_QUEUE_FILL_ON_TASK /* Does not work, yet */ #undef MV643XX_COAL /* @@ -73,25 +36,40 @@ #define MV643XX_RX_COAL 100 #endif -/* - * The second part is the low level driver of the gigE ethernet ports. - */ +#ifdef MV643XX_CHECKSUM_OFFLOAD_TX +#define MAX_DESCS_PER_SKB (MAX_SKB_FRAGS + 1) +#else +#define MAX_DESCS_PER_SKB 1 +#endif -/* - * Header File for : MV-643xx network interface header - * - * DESCRIPTION: - * This header file contains macros typedefs and function declaration for - * the Marvell Gig Bit Ethernet Controller. - * - * DEPENDENCIES: - * None. - * - */ +#define ETH_VLAN_HLEN 4 +#define ETH_FCS_LEN 4 +#define ETH_DMA_ALIGN 8 /* hw requires 8-byte alignment */ +#define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ +#define ETH_WRAPPER_LEN (ETH_HW_IP_ALIGN + ETH_HLEN + \ + ETH_VLAN_HLEN + ETH_FCS_LEN) +#define ETH_RX_SKB_SIZE ((dev->mtu + ETH_WRAPPER_LEN + 7) & ~0x7) + +#define ETH_RX_QUEUES_ENABLED (1 << 0) /* use only Q0 for receive */ +#define ETH_TX_QUEUES_ENABLED (1 << 0) /* use only Q0 for transmit */ + +#define ETH_INT_CAUSE_RX_DONE (ETH_RX_QUEUES_ENABLED << 2) +#define ETH_INT_CAUSE_RX_ERROR (ETH_RX_QUEUES_ENABLED << 9) +#define ETH_INT_CAUSE_RX (ETH_INT_CAUSE_RX_DONE | ETH_INT_CAUSE_RX_ERROR) +#define ETH_INT_CAUSE_EXT 0x00000002 +#define ETH_INT_UNMASK_ALL (ETH_INT_CAUSE_RX | ETH_INT_CAUSE_EXT) -/* MAC accepet/reject macros */ -#define ACCEPT_MAC_ADDR 0 -#define REJECT_MAC_ADDR 1 +#define ETH_INT_CAUSE_TX_DONE (ETH_TX_QUEUES_ENABLED << 0) +#define ETH_INT_CAUSE_TX_ERROR (ETH_TX_QUEUES_ENABLED << 8) +#define ETH_INT_CAUSE_TX (ETH_INT_CAUSE_TX_DONE | ETH_INT_CAUSE_TX_ERROR) +#define ETH_INT_CAUSE_PHY 0x00010000 +#define ETH_INT_UNMASK_ALL_EXT (ETH_INT_CAUSE_TX | ETH_INT_CAUSE_PHY) + +#define ETH_INT_MASK_ALL 0x00000000 +#define ETH_INT_MASK_ALL_EXT 0x00000000 + +#define PHY_WAIT_ITERATIONS 1000 /* 1000 iterations * 10uS = 10mS max */ +#define PHY_WAIT_MICRO_SECONDS 10 /* Buffer offset from buffer pointer */ #define RX_BUF_OFFSET 0x2 @@ -133,88 +111,71 @@ #define ETH_MIB_LATE_COLLISION 0x7c /* Port serial status reg (PSR) */ -#define ETH_INTERFACE_GMII_MII 0 -#define ETH_INTERFACE_PCM BIT0 -#define ETH_LINK_IS_DOWN 0 -#define ETH_LINK_IS_UP BIT1 -#define ETH_PORT_AT_HALF_DUPLEX 0 -#define ETH_PORT_AT_FULL_DUPLEX BIT2 -#define ETH_RX_FLOW_CTRL_DISABLED 0 -#define ETH_RX_FLOW_CTRL_ENBALED BIT3 -#define ETH_GMII_SPEED_100_10 0 -#define ETH_GMII_SPEED_1000 BIT4 -#define ETH_MII_SPEED_10 0 -#define ETH_MII_SPEED_100 BIT5 -#define ETH_NO_TX 0 -#define ETH_TX_IN_PROGRESS BIT7 -#define ETH_BYPASS_NO_ACTIVE 0 -#define ETH_BYPASS_ACTIVE BIT8 -#define ETH_PORT_NOT_AT_PARTITION_STATE 0 -#define ETH_PORT_AT_PARTITION_STATE BIT9 -#define ETH_PORT_TX_FIFO_NOT_EMPTY 0 -#define ETH_PORT_TX_FIFO_EMPTY BIT10 - -#define ETH_DEFAULT_RX_BPDU_QUEUE_3 (BIT23 | BIT22) -#define ETH_DEFAULT_RX_BPDU_QUEUE_4 BIT24 -#define ETH_DEFAULT_RX_BPDU_QUEUE_5 (BIT24 | BIT22) -#define ETH_DEFAULT_RX_BPDU_QUEUE_6 (BIT24 | BIT23) -#define ETH_DEFAULT_RX_BPDU_QUEUE_7 (BIT24 | BIT23 | BIT22) +#define ETH_INTERFACE_PCM 0x00000001 +#define ETH_LINK_IS_UP 0x00000002 +#define ETH_PORT_AT_FULL_DUPLEX 0x00000004 +#define ETH_RX_FLOW_CTRL_ENABLED 0x00000008 +#define ETH_GMII_SPEED_1000 0x00000010 +#define ETH_MII_SPEED_100 0x00000020 +#define ETH_TX_IN_PROGRESS 0x00000080 +#define ETH_BYPASS_ACTIVE 0x00000100 +#define ETH_PORT_AT_PARTITION_STATE 0x00000200 +#define ETH_PORT_TX_FIFO_EMPTY 0x00000400 /* SMI reg */ -#define ETH_SMI_BUSY BIT28 /* 0 - Write, 1 - Read */ -#define ETH_SMI_READ_VALID BIT27 /* 0 - Write, 1 - Read */ -#define ETH_SMI_OPCODE_WRITE 0 /* Completion of Read operation */ -#define ETH_SMI_OPCODE_READ BIT26 /* Operation is in progress */ +#define ETH_SMI_BUSY 0x10000000 /* 0 - Write, 1 - Read */ +#define ETH_SMI_READ_VALID 0x08000000 /* 0 - Write, 1 - Read */ +#define ETH_SMI_OPCODE_WRITE 0 /* Completion of Read */ +#define ETH_SMI_OPCODE_READ 0x04000000 /* Operation is in progress */ + +/* Interrupt Cause Register Bit Definitions */ /* SDMA command status fields macros */ /* Tx & Rx descriptors status */ -#define ETH_ERROR_SUMMARY (BIT0) +#define ETH_ERROR_SUMMARY 0x00000001 /* Tx & Rx descriptors command */ -#define ETH_BUFFER_OWNED_BY_DMA (BIT31) +#define ETH_BUFFER_OWNED_BY_DMA 0x80000000 /* Tx descriptors status */ -#define ETH_LC_ERROR (0 ) -#define ETH_UR_ERROR (BIT1 ) -#define ETH_RL_ERROR (BIT2 ) -#define ETH_LLC_SNAP_FORMAT (BIT9 ) +#define ETH_LC_ERROR 0 +#define ETH_UR_ERROR 0x00000002 +#define ETH_RL_ERROR 0x00000004 +#define ETH_LLC_SNAP_FORMAT 0x00000200 /* Rx descriptors status */ -#define ETH_CRC_ERROR (0 ) -#define ETH_OVERRUN_ERROR (BIT1 ) -#define ETH_MAX_FRAME_LENGTH_ERROR (BIT2 ) -#define ETH_RESOURCE_ERROR ((BIT2 | BIT1)) -#define ETH_VLAN_TAGGED (BIT19) -#define ETH_BPDU_FRAME (BIT20) -#define ETH_TCP_FRAME_OVER_IP_V_4 (0 ) -#define ETH_UDP_FRAME_OVER_IP_V_4 (BIT21) -#define ETH_OTHER_FRAME_TYPE (BIT22) -#define ETH_LAYER_2_IS_ETH_V_2 (BIT23) -#define ETH_FRAME_TYPE_IP_V_4 (BIT24) -#define ETH_FRAME_HEADER_OK (BIT25) -#define ETH_RX_LAST_DESC (BIT26) -#define ETH_RX_FIRST_DESC (BIT27) -#define ETH_UNKNOWN_DESTINATION_ADDR (BIT28) -#define ETH_RX_ENABLE_INTERRUPT (BIT29) -#define ETH_LAYER_4_CHECKSUM_OK (BIT30) +#define ETH_OVERRUN_ERROR 0x00000002 +#define ETH_MAX_FRAME_LENGTH_ERROR 0x00000004 +#define ETH_RESOURCE_ERROR 0x00000006 +#define ETH_VLAN_TAGGED 0x00080000 +#define ETH_BPDU_FRAME 0x00100000 +#define ETH_UDP_FRAME_OVER_IP_V_4 0x00200000 +#define ETH_OTHER_FRAME_TYPE 0x00400000 +#define ETH_LAYER_2_IS_ETH_V_2 0x00800000 +#define ETH_FRAME_TYPE_IP_V_4 0x01000000 +#define ETH_FRAME_HEADER_OK 0x02000000 +#define ETH_RX_LAST_DESC 0x04000000 +#define ETH_RX_FIRST_DESC 0x08000000 +#define ETH_UNKNOWN_DESTINATION_ADDR 0x10000000 +#define ETH_RX_ENABLE_INTERRUPT 0x20000000 +#define ETH_LAYER_4_CHECKSUM_OK 0x40000000 /* Rx descriptors byte count */ -#define ETH_FRAME_FRAGMENTED (BIT2) +#define ETH_FRAME_FRAGMENTED 0x00000004 /* Tx descriptors command */ -#define ETH_LAYER_4_CHECKSUM_FIRST_DESC (BIT10) -#define ETH_FRAME_SET_TO_VLAN (BIT15) -#define ETH_TCP_FRAME (0 ) -#define ETH_UDP_FRAME (BIT16) -#define ETH_GEN_TCP_UDP_CHECKSUM (BIT17) -#define ETH_GEN_IP_V_4_CHECKSUM (BIT18) -#define ETH_ZERO_PADDING (BIT19) -#define ETH_TX_LAST_DESC (BIT20) -#define ETH_TX_FIRST_DESC (BIT21) -#define ETH_GEN_CRC (BIT22) -#define ETH_TX_ENABLE_INTERRUPT (BIT23) -#define ETH_AUTO_MODE (BIT30) +#define ETH_LAYER_4_CHECKSUM_FIRST_DESC 0x00000400 +#define ETH_FRAME_SET_TO_VLAN 0x00008000 +#define ETH_UDP_FRAME 0x00010000 +#define ETH_GEN_TCP_UDP_CHECKSUM 0x00020000 +#define ETH_GEN_IP_V_4_CHECKSUM 0x00040000 +#define ETH_ZERO_PADDING 0x00080000 +#define ETH_TX_LAST_DESC 0x00100000 +#define ETH_TX_FIRST_DESC 0x00200000 +#define ETH_GEN_CRC 0x00400000 +#define ETH_TX_ENABLE_INTERRUPT 0x00800000 +#define ETH_AUTO_MODE 0x40000000 #define ETH_TX_IHL_SHIFT 11 @@ -324,13 +285,6 @@ struct mv643xx_mib_counters { struct mv643xx_private { int port_num; /* User Ethernet port number */ - u8 port_mac_addr[6]; /* User defined port MAC address.*/ - u32 port_config; /* User port configuration value*/ - u32 port_config_extend; /* User port config extend value*/ - u32 port_sdma_config; /* User port SDMA config value */ - u32 port_serial_control; /* User port serial control value */ - u32 port_tx_queue_command; /* Port active Tx queues summary*/ - u32 port_rx_queue_command; /* Port active Rx queues summary*/ u32 rx_sram_addr; /* Base address of rx sram area */ u32 rx_sram_size; /* Size of rx sram area */ @@ -338,7 +292,6 @@ struct mv643xx_private { u32 tx_sram_size; /* Size of tx sram area */ int rx_resource_err; /* Rx ring resource error flag */ - int tx_resource_err; /* Tx ring resource error flag */ /* Tx/Rx rings managment indexes fields. For driver use */ @@ -347,10 +300,6 @@ struct mv643xx_private { /* Next available and first returning Tx resource */ int tx_curr_desc_q, tx_used_desc_q; -#ifdef MV643XX_CHECKSUM_OFFLOAD_TX - int tx_first_desc_q; - u32 tx_first_command; -#endif #ifdef MV643XX_TX_FAST_REFILL u32 tx_clean_threshold; @@ -358,54 +307,43 @@ struct mv643xx_private { struct eth_rx_desc *p_rx_desc_area; dma_addr_t rx_desc_dma; - unsigned int rx_desc_area_size; + int rx_desc_area_size; struct sk_buff **rx_skb; struct eth_tx_desc *p_tx_desc_area; dma_addr_t tx_desc_dma; - unsigned int tx_desc_area_size; + int tx_desc_area_size; struct sk_buff **tx_skb; struct work_struct tx_timeout_task; - /* - * Former struct mv643xx_eth_priv members start here - */ struct net_device_stats stats; struct mv643xx_mib_counters mib_counters; spinlock_t lock; /* Size of Tx Ring per queue */ - unsigned int tx_ring_size; - /* Ammont of SKBs outstanding on Tx queue */ - unsigned int tx_ring_skbs; + int tx_ring_size; + /* Number of tx descriptors in use */ + int tx_desc_count; /* Size of Rx Ring per queue */ - unsigned int rx_ring_size; - /* Ammount of SKBs allocated to Rx Ring per queue */ - unsigned int rx_ring_skbs; - - /* - * rx_task used to fill RX ring out of bottom half context - */ - struct work_struct rx_task; + int rx_ring_size; + /* Number of rx descriptors in use */ + int rx_desc_count; /* * Used in case RX Ring is empty, which can be caused when * system does not have resources (skb's) */ struct timer_list timeout; - long rx_task_busy __attribute__ ((aligned(SMP_CACHE_BYTES))); - unsigned rx_timer_flag; u32 rx_int_coal; u32 tx_int_coal; + struct mii_if_info mii; }; -/* ethernet.h API list */ - /* Port operation control routines */ static void eth_port_init(struct mv643xx_private *mp); static void eth_port_reset(unsigned int eth_port_num); -static void eth_port_start(struct mv643xx_private *mp); +static void eth_port_start(struct net_device *dev); /* Port MAC address routines */ static void eth_port_uc_addr_set(unsigned int eth_port_num, @@ -423,10 +361,6 @@ static void eth_port_read_smi_reg(unsigned int eth_port_num, static void eth_clear_mib_counters(unsigned int eth_port_num); /* Port data flow control routines */ -static ETH_FUNC_RET_STATUS eth_port_send(struct mv643xx_private *mp, - struct pkt_info *p_pkt_info); -static ETH_FUNC_RET_STATUS eth_tx_return_desc(struct mv643xx_private *mp, - struct pkt_info *p_pkt_info); static ETH_FUNC_RET_STATUS eth_port_receive(struct mv643xx_private *mp, struct pkt_info *p_pkt_info); static ETH_FUNC_RET_STATUS eth_rx_return_buff(struct mv643xx_private *mp, diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c index 9d6d254..8d49998 100644 --- a/drivers/net/natsemi.c +++ b/drivers/net/natsemi.c @@ -3,6 +3,7 @@ Written/copyright 1999-2001 by Donald Becker. Portions copyright (c) 2001,2002 Sun Microsystems (thockin@sun.com) Portions copyright 2001,2002 Manfred Spraul (manfred@colorfullife.com) + Portions copyright 2004 Harald Welte <laforge@gnumonks.org> This software may be used and distributed according to the terms of the GNU General Public License (GPL), incorporated herein by reference. @@ -135,8 +136,6 @@ TODO: * big endian support with CFG:BEM instead of cpu_to_le32 - * support for an external PHY - * NAPI */ #include <linux/config.h> @@ -160,6 +159,7 @@ #include <linux/mii.h> #include <linux/crc32.h> #include <linux/bitops.h> +#include <linux/prefetch.h> #include <asm/processor.h> /* Processor type for cache alignment. */ #include <asm/io.h> #include <asm/irq.h> @@ -183,13 +183,11 @@ NETIF_MSG_TX_ERR) static int debug = -1; -/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ -static int max_interrupt_work = 20; static int mtu; /* Maximum number of multicast addresses to filter (vs. rx-all-multicast). This chip uses a 512 element hash table based on the Ethernet CRC. */ -static int multicast_filter_limit = 100; +static const int multicast_filter_limit = 100; /* Set the copy breakpoint for the copy-only-tiny-frames scheme. Setting to > 1518 effectively disables this feature. */ @@ -251,14 +249,11 @@ MODULE_AUTHOR("Donald Becker <becker@scyld.com>"); MODULE_DESCRIPTION("National Semiconductor DP8381x series PCI Ethernet driver"); MODULE_LICENSE("GPL"); -module_param(max_interrupt_work, int, 0); module_param(mtu, int, 0); module_param(debug, int, 0); module_param(rx_copybreak, int, 0); module_param_array(options, int, NULL, 0); module_param_array(full_duplex, int, NULL, 0); -MODULE_PARM_DESC(max_interrupt_work, - "DP8381x maximum events handled per interrupt"); MODULE_PARM_DESC(mtu, "DP8381x MTU (all boards)"); MODULE_PARM_DESC(debug, "DP8381x default debug level"); MODULE_PARM_DESC(rx_copybreak, @@ -374,7 +369,7 @@ enum pcistuff { /* array of board data directly indexed by pci_tbl[x].driver_data */ -static struct { +static const struct { const char *name; unsigned long flags; } natsemi_pci_info[] __devinitdata = { @@ -691,6 +686,8 @@ struct netdev_private { /* Based on MTU+slack. */ unsigned int rx_buf_sz; int oom; + /* Interrupt status */ + u32 intr_status; /* Do not touch the nic registers */ int hands_off; /* external phy that is used: only valid if dev->if_port != PORT_TP */ @@ -748,7 +745,8 @@ static void init_registers(struct net_device *dev); static int start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance, struct pt_regs *regs); static void netdev_error(struct net_device *dev, int intr_status); -static void netdev_rx(struct net_device *dev); +static int natsemi_poll(struct net_device *dev, int *budget); +static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do); static void netdev_tx_done(struct net_device *dev); static int natsemi_change_mtu(struct net_device *dev, int new_mtu); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -776,6 +774,18 @@ static inline void __iomem *ns_ioaddr(struct net_device *dev) return (void __iomem *) dev->base_addr; } +static inline void natsemi_irq_enable(struct net_device *dev) +{ + writel(1, ns_ioaddr(dev) + IntrEnable); + readl(ns_ioaddr(dev) + IntrEnable); +} + +static inline void natsemi_irq_disable(struct net_device *dev) +{ + writel(0, ns_ioaddr(dev) + IntrEnable); + readl(ns_ioaddr(dev) + IntrEnable); +} + static void move_int_phy(struct net_device *dev, int addr) { struct netdev_private *np = netdev_priv(dev); @@ -879,6 +889,7 @@ static int __devinit natsemi_probe1 (struct pci_dev *pdev, spin_lock_init(&np->lock); np->msg_enable = (debug >= 0) ? (1<<debug)-1 : NATSEMI_DEF_MSG; np->hands_off = 0; + np->intr_status = 0; /* Initial port: * - If the nic was configured to use an external phy and if find_mii @@ -932,6 +943,9 @@ static int __devinit natsemi_probe1 (struct pci_dev *pdev, dev->do_ioctl = &netdev_ioctl; dev->tx_timeout = &tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; + dev->poll = natsemi_poll; + dev->weight = 64; + #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = &natsemi_poll_controller; #endif @@ -1484,6 +1498,31 @@ static void natsemi_reset(struct net_device *dev) writel(rfcr, ioaddr + RxFilterAddr); } +static void reset_rx(struct net_device *dev) +{ + int i; + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = ns_ioaddr(dev); + + np->intr_status &= ~RxResetDone; + + writel(RxReset, ioaddr + ChipCmd); + + for (i=0;i<NATSEMI_HW_TIMEOUT;i++) { + np->intr_status |= readl(ioaddr + IntrStatus); + if (np->intr_status & RxResetDone) + break; + udelay(15); + } + if (i==NATSEMI_HW_TIMEOUT) { + printk(KERN_WARNING "%s: RX reset did not complete in %d usec.\n", + dev->name, i*15); + } else if (netif_msg_hw(np)) { + printk(KERN_WARNING "%s: RX reset took %d usec.\n", + dev->name, i*15); + } +} + static void natsemi_reload_eeprom(struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); @@ -2158,68 +2197,92 @@ static void netdev_tx_done(struct net_device *dev) } } -/* The interrupt handler does all of the Rx thread work and cleans up - after the Tx thread. */ +/* The interrupt handler doesn't actually handle interrupts itself, it + * schedules a NAPI poll if there is anything to do. */ static irqreturn_t intr_handler(int irq, void *dev_instance, struct pt_regs *rgs) { struct net_device *dev = dev_instance; struct netdev_private *np = netdev_priv(dev); void __iomem * ioaddr = ns_ioaddr(dev); - int boguscnt = max_interrupt_work; - unsigned int handled = 0; if (np->hands_off) return IRQ_NONE; - do { - /* Reading automatically acknowledges all int sources. */ - u32 intr_status = readl(ioaddr + IntrStatus); + + /* Reading automatically acknowledges. */ + np->intr_status = readl(ioaddr + IntrStatus); - if (netif_msg_intr(np)) - printk(KERN_DEBUG - "%s: Interrupt, status %#08x, mask %#08x.\n", - dev->name, intr_status, - readl(ioaddr + IntrMask)); + if (netif_msg_intr(np)) + printk(KERN_DEBUG + "%s: Interrupt, status %#08x, mask %#08x.\n", + dev->name, np->intr_status, + readl(ioaddr + IntrMask)); - if (intr_status == 0) - break; - handled = 1; + if (!np->intr_status) + return IRQ_NONE; - if (intr_status & - (IntrRxDone | IntrRxIntr | RxStatusFIFOOver | - IntrRxErr | IntrRxOverrun)) { - netdev_rx(dev); - } + prefetch(&np->rx_skbuff[np->cur_rx % RX_RING_SIZE]); - if (intr_status & - (IntrTxDone | IntrTxIntr | IntrTxIdle | IntrTxErr)) { + if (netif_rx_schedule_prep(dev)) { + /* Disable interrupts and register for poll */ + natsemi_irq_disable(dev); + __netif_rx_schedule(dev); + } + return IRQ_HANDLED; +} + +/* This is the NAPI poll routine. As well as the standard RX handling + * it also handles all other interrupts that the chip might raise. + */ +static int natsemi_poll(struct net_device *dev, int *budget) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem * ioaddr = ns_ioaddr(dev); + + int work_to_do = min(*budget, dev->quota); + int work_done = 0; + + do { + if (np->intr_status & + (IntrTxDone | IntrTxIntr | IntrTxIdle | IntrTxErr)) { spin_lock(&np->lock); netdev_tx_done(dev); spin_unlock(&np->lock); } /* Abnormal error summary/uncommon events handlers. */ - if (intr_status & IntrAbnormalSummary) - netdev_error(dev, intr_status); - - if (--boguscnt < 0) { - if (netif_msg_intr(np)) - printk(KERN_WARNING - "%s: Too much work at interrupt, " - "status=%#08x.\n", - dev->name, intr_status); - break; + if (np->intr_status & IntrAbnormalSummary) + netdev_error(dev, np->intr_status); + + if (np->intr_status & + (IntrRxDone | IntrRxIntr | RxStatusFIFOOver | + IntrRxErr | IntrRxOverrun)) { + netdev_rx(dev, &work_done, work_to_do); } - } while (1); + + *budget -= work_done; + dev->quota -= work_done; - if (netif_msg_intr(np)) - printk(KERN_DEBUG "%s: exiting interrupt.\n", dev->name); + if (work_done >= work_to_do) + return 1; + + np->intr_status = readl(ioaddr + IntrStatus); + } while (np->intr_status); + + netif_rx_complete(dev); - return IRQ_RETVAL(handled); + /* Reenable interrupts providing nothing is trying to shut + * the chip down. */ + spin_lock(&np->lock); + if (!np->hands_off && netif_running(dev)) + natsemi_irq_enable(dev); + spin_unlock(&np->lock); + + return 0; } /* This routine is logically part of the interrupt handler, but separated for clarity and better register allocation. */ -static void netdev_rx(struct net_device *dev) +static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do) { struct netdev_private *np = netdev_priv(dev); int entry = np->cur_rx % RX_RING_SIZE; @@ -2237,6 +2300,12 @@ static void netdev_rx(struct net_device *dev) entry, desc_status); if (--boguscnt < 0) break; + + if (*work_done >= work_to_do) + break; + + (*work_done)++; + pkt_len = (desc_status & DescSizeMask) - 4; if ((desc_status&(DescMore|DescPktOK|DescRxLong)) != DescPktOK){ if (desc_status & DescMore) { @@ -2248,6 +2317,23 @@ static void netdev_rx(struct net_device *dev) "status %#08x.\n", dev->name, np->cur_rx, desc_status); np->stats.rx_length_errors++; + + /* The RX state machine has probably + * locked up beneath us. Follow the + * reset procedure documented in + * AN-1287. */ + + spin_lock_irq(&np->lock); + reset_rx(dev); + reinit_rx(dev); + writel(np->ring_dma, ioaddr + RxRingPtr); + check_link(dev); + spin_unlock_irq(&np->lock); + + /* We'll enable RX on exit from this + * function. */ + break; + } else { /* There was an error. */ np->stats.rx_errors++; @@ -2293,7 +2379,7 @@ static void netdev_rx(struct net_device *dev) np->rx_skbuff[entry] = NULL; } skb->protocol = eth_type_trans(skb, dev); - netif_rx(skb); + netif_receive_skb(skb); dev->last_rx = jiffies; np->stats.rx_packets++; np->stats.rx_bytes += pkt_len; @@ -3074,9 +3160,7 @@ static int netdev_close(struct net_device *dev) del_timer_sync(&np->timer); disable_irq(dev->irq); spin_lock_irq(&np->lock); - /* Disable interrupts, and flush posted writes */ - writel(0, ioaddr + IntrEnable); - readl(ioaddr + IntrEnable); + natsemi_irq_disable(dev); np->hands_off = 1; spin_unlock_irq(&np->lock); enable_irq(dev->irq); @@ -3158,6 +3242,9 @@ static void __devexit natsemi_remove1 (struct pci_dev *pdev) * * netdev_timer: timer stopped by natsemi_suspend. * * intr_handler: doesn't acquire the spinlock. suspend calls * disable_irq() to enforce synchronization. + * * natsemi_poll: checks before reenabling interrupts. suspend + * sets hands_off, disables interrupts and then waits with + * netif_poll_disable(). * * Interrupts must be disabled, otherwise hands_off can cause irq storms. */ @@ -3183,6 +3270,8 @@ static int natsemi_suspend (struct pci_dev *pdev, pm_message_t state) spin_unlock_irq(&np->lock); enable_irq(dev->irq); + netif_poll_disable(dev); + /* Update the error counts. */ __get_stats(dev); @@ -3235,6 +3324,7 @@ static int natsemi_resume (struct pci_dev *pdev) mod_timer(&np->timer, jiffies + 1*HZ); } netif_device_attach(dev); + netif_poll_enable(dev); out: rtnl_unlock(); return 0; diff --git a/drivers/net/ne-h8300.c b/drivers/net/ne-h8300.c index 8f40368..aaebd28 100644 --- a/drivers/net/ne-h8300.c +++ b/drivers/net/ne-h8300.c @@ -27,6 +27,7 @@ static const char version1[] = #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/io.h> @@ -365,7 +366,7 @@ static void ne_reset_8390(struct net_device *dev) /* This check _should_not_ be necessary, omit eventually. */ while ((inb_p(NE_BASE+EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk(KERN_WARNING "%s: ne_reset_8390() did not complete.\n", dev->name); break; } @@ -580,7 +581,7 @@ retry: #endif while ((inb_p(NE_BASE + EN0_ISR) & ENISR_RDC) == 0) - if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ + if (time_after(jiffies, dma_start + 2*HZ/100)) { /* 20ms */ printk(KERN_WARNING "%s: timeout waiting for Tx RDC.\n", dev->name); ne_reset_8390(dev); NS8390_init(dev,1); diff --git a/drivers/net/ne.c b/drivers/net/ne.c index 94f782d..08b218c 100644 --- a/drivers/net/ne.c +++ b/drivers/net/ne.c @@ -50,6 +50,7 @@ static const char version2[] = #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/io.h> @@ -341,7 +342,7 @@ static int __init ne_probe1(struct net_device *dev, int ioaddr) outb(inb(ioaddr + NE_RESET), ioaddr + NE_RESET); while ((inb_p(ioaddr + EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { if (bad_card) { printk(" (warning: no reset ack)"); break; @@ -580,7 +581,7 @@ static void ne_reset_8390(struct net_device *dev) /* This check _should_not_ be necessary, omit eventually. */ while ((inb_p(NE_BASE+EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk(KERN_WARNING "%s: ne_reset_8390() did not complete.\n", dev->name); break; } @@ -787,7 +788,7 @@ retry: #endif while ((inb_p(nic_base + EN0_ISR) & ENISR_RDC) == 0) - if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ + if (time_after(jiffies, dma_start + 2*HZ/100)) { /* 20ms */ printk(KERN_WARNING "%s: timeout waiting for Tx RDC.\n", dev->name); ne_reset_8390(dev); NS8390_init(dev,1); diff --git a/drivers/net/ne2.c b/drivers/net/ne2.c index e6df375..2aa7b77 100644 --- a/drivers/net/ne2.c +++ b/drivers/net/ne2.c @@ -75,6 +75,7 @@ static const char *version = "ne2.c:v0.91 Nov 16 1998 Wim Dumon <wimpie@kotnet.o #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/io.h> @@ -395,7 +396,7 @@ static int __init ne2_probe1(struct net_device *dev, int slot) outb(inb(base_addr + NE_RESET), base_addr + NE_RESET); while ((inb_p(base_addr + EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk(" not found (no reset ack).\n"); retval = -ENODEV; goto out; @@ -548,7 +549,7 @@ static void ne_reset_8390(struct net_device *dev) /* This check _should_not_ be necessary, omit eventually. */ while ((inb_p(NE_BASE+EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk("%s: ne_reset_8390() did not complete.\n", dev->name); break; @@ -749,7 +750,7 @@ retry: #endif while ((inb_p(nic_base + EN0_ISR) & ENISR_RDC) == 0) - if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ + if (time_after(jiffies, dma_start + 2*HZ/100)) { /* 20ms */ printk("%s: timeout waiting for Tx RDC.\n", dev->name); ne_reset_8390(dev); NS8390_init(dev,1); diff --git a/drivers/net/ne2k-pci.c b/drivers/net/ne2k-pci.c index d11821d..e3ebb58 100644 --- a/drivers/net/ne2k-pci.c +++ b/drivers/net/ne2k-pci.c @@ -117,7 +117,7 @@ enum ne2k_pci_chipsets { }; -static struct { +static const struct { char *name; int flags; } pci_clone_list[] __devinitdata = { diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index b0c3b6a..0fede50 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -116,6 +116,7 @@ #include <linux/timer.h> #include <linux/if_vlan.h> #include <linux/rtnetlink.h> +#include <linux/jiffies.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -651,7 +652,7 @@ static void FASTCALL(phy_intr(struct net_device *ndev)); static void fastcall phy_intr(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); - static char *speeds[] = { "10", "100", "1000", "1000(?)", "1000F" }; + static const char *speeds[] = { "10", "100", "1000", "1000(?)", "1000F" }; u32 cfg, new_cfg; u32 tbisr, tanar, tanlpar; int speed, fullduplex, newlinkstate; @@ -1607,7 +1608,7 @@ static void ns83820_run_bist(struct net_device *ndev, const char *name, u32 enab { struct ns83820 *dev = PRIV(ndev); int timed_out = 0; - long start; + unsigned long start; u32 status; int loops = 0; @@ -1625,7 +1626,7 @@ static void ns83820_run_bist(struct net_device *ndev, const char *name, u32 enab break; if (status & fail) break; - if ((jiffies - start) >= HZ) { + if (time_after_eq(jiffies, start + HZ)) { timed_out = 1; break; } diff --git a/drivers/net/oaknet.c b/drivers/net/oaknet.c index 62167a2..d0f686d 100644 --- a/drivers/net/oaknet.c +++ b/drivers/net/oaknet.c @@ -20,6 +20,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/init.h> +#include <linux/jiffies.h> #include <asm/board.h> #include <asm/io.h> @@ -606,7 +607,7 @@ retry: #endif while ((ei_ibp(base + EN0_ISR) & ENISR_RDC) == 0) { - if (jiffies - start > OAKNET_WAIT) { + if (time_after(jiffies, start + OAKNET_WAIT)) { printk("%s: timeout waiting for Tx RDC.\n", dev->name); oaknet_reset_8390(dev); NS8390_init(dev, TRUE); diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index 48774ef..ce90bec 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -341,7 +341,7 @@ static void tc574_detach(struct pcmcia_device *p_dev) #define CS_CHECK(fn, ret) \ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) -static char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; +static const char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; static void tc574_config(dev_link_t *link) { diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index 1c3c9c6..3dba508 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -39,6 +39,7 @@ #include <linux/if_arp.h> #include <linux/ioport.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <pcmcia/cs_types.h> #include <pcmcia/cs.h> @@ -115,7 +116,7 @@ struct el3_private { spinlock_t lock; }; -static char *if_names[] = { "auto", "10baseT", "10base2", "AUI" }; +static const char *if_names[] = { "auto", "10baseT", "10base2", "AUI" }; /*====================================================================*/ @@ -796,7 +797,7 @@ static void media_check(unsigned long arg) media = inw(ioaddr+WN4_MEDIA) & 0xc810; /* Ignore collisions unless we've had no irq's recently */ - if (jiffies - lp->last_irq < HZ) { + if (time_before(jiffies, lp->last_irq + HZ)) { media &= ~0x0010; } else { /* Try harder to detect carrier errors */ diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 28fe2fb..b7ac14b 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -309,7 +309,7 @@ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) static int mfc_try_io_port(dev_link_t *link) { int i, ret; - static kio_addr_t serial_base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; + static const kio_addr_t serial_base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 }; for (i = 0; i < 5; i++) { link->io.BasePort2 = serial_base[i]; diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 4a23225..787176c 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -388,7 +388,7 @@ static char *version = DRV_NAME " " DRV_VERSION " (Roger C. Pao)"; #endif -static char *if_names[]={ +static const char *if_names[]={ "Auto", "10baseT", "BNC", }; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index d85b758..b46e5f7 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -66,7 +66,7 @@ #define PCNET_RDC_TIMEOUT (2*HZ/100) /* Max wait in jiffies for Tx RDC */ -static char *if_names[] = { "auto", "10baseT", "10base2"}; +static const char *if_names[] = { "auto", "10baseT", "10base2"}; #ifdef PCMCIA_DEBUG static int pc_debug = PCMCIA_DEBUG; @@ -1727,6 +1727,7 @@ static struct pcmcia_device_id pcnet_ids[] = { PCMCIA_DEVICE_PROD_ID12("Linksys", "EtherFast 10/100 PC Card (PCMPC100 V2)", 0x0733cc81, 0x3a3b28e9), PCMCIA_DEVICE_PROD_ID12("Linksys", "HomeLink Phoneline + 10/100 Network PC Card (PCM100H1)", 0x733cc81, 0x7a3e5c3a), PCMCIA_DEVICE_PROD_ID12("Logitec", "LPM-LN100TX", 0x88fcdeda, 0x6d772737), + PCMCIA_DEVICE_PROD_ID12("Logitec", "LPM-LN100TE", 0x88fcdeda, 0x0e714bee), PCMCIA_DEVICE_PROD_ID12("Logitec", "LPM-LN20T", 0x88fcdeda, 0x81090922), PCMCIA_DEVICE_PROD_ID12("LONGSHINE", "PCMCIA Ethernet Card", 0xf866b0b0, 0x6f6652e0), PCMCIA_DEVICE_PROD_ID12("MACNICA", "ME1-JEIDA", 0x20841b68, 0xaf8a3578), diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 0122415..8839c4f 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -59,7 +59,7 @@ /*====================================================================*/ -static char *if_names[] = { "auto", "10baseT", "10base2"}; +static const char *if_names[] = { "auto", "10baseT", "10base2"}; /* Module parameters */ @@ -777,7 +777,7 @@ free_cfg_mem: static int osi_config(dev_link_t *link) { struct net_device *dev = link->priv; - static kio_addr_t com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; + static const kio_addr_t com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; int i, j; link->conf.Attributes |= CONF_ENABLE_SPKR; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 593d8ad..eed4968 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -208,7 +208,7 @@ enum xirc_cmd { /* Commands */ #define XIRCREG45_REV 15 /* Revision Register (rd) */ #define XIRCREG50_IA 8 /* Individual Address (8-13) */ -static char *if_names[] = { "Auto", "10BaseT", "10Base2", "AUI", "100BaseT" }; +static const char *if_names[] = { "Auto", "10BaseT", "10Base2", "AUI", "100BaseT" }; /**************** * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 8f6cf8c..7e90057 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -26,7 +26,7 @@ #define DRV_RELDATE "01.Nov.2005" #define PFX DRV_NAME ": " -static const char *version = +static const char * const version = DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n"; #include <linux/module.h> @@ -109,7 +109,7 @@ static int rx_copybreak = 200; * table to translate option values from tulip * to internal options */ -static unsigned char options_mapping[] = { +static const unsigned char options_mapping[] = { PCNET32_PORT_ASEL, /* 0 Auto-select */ PCNET32_PORT_AUI, /* 1 BNC/AUI */ PCNET32_PORT_AUI, /* 2 AUI/BNC */ @@ -733,7 +733,7 @@ static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1) int rc; /* return code */ int size; /* size of packets */ unsigned char *packet; /* source packet data */ - static int data_len = 60; /* length of source packets */ + static const int data_len = 60; /* length of source packets */ unsigned long flags; unsigned long ticks; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1474b7c..33cec2d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -132,7 +132,7 @@ struct phy_setting { }; /* A mapping of all SUPPORTED settings to speed/duplex */ -static struct phy_setting settings[] = { +static const struct phy_setting settings[] = { { .speed = 10000, .duplex = DUPLEX_FULL, diff --git a/drivers/net/plip.c b/drivers/net/plip.c index 87ee327..d4449d6 100644 --- a/drivers/net/plip.c +++ b/drivers/net/plip.c @@ -123,7 +123,7 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n" #ifndef NET_DEBUG #define NET_DEBUG 1 #endif -static unsigned int net_debug = NET_DEBUG; +static const unsigned int net_debug = NET_DEBUG; #define ENABLE(irq) if (irq != -1) enable_irq(irq) #define DISABLE(irq) if (irq != -1) disable_irq(irq) @@ -351,7 +351,7 @@ static int plip_bh_timeout_error(struct net_device *dev, struct net_local *nl, typedef int (*plip_func)(struct net_device *dev, struct net_local *nl, struct plip_local *snd, struct plip_local *rcv); -static plip_func connection_state_table[] = +static const plip_func connection_state_table[] = { plip_none, plip_receive_packet, diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index aa6540b..23659fd 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -30,6 +30,7 @@ #include <linux/ppp_channel.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/jiffies.h> #include <asm/uaccess.h> #include <asm/string.h> @@ -570,7 +571,7 @@ ppp_async_encode(struct asyncppp *ap) * character if necessary. */ if (islcp || flag_time == 0 - || jiffies - ap->last_xmit >= flag_time) + || time_after_eq(jiffies, ap->last_xmit + flag_time)) *buf++ = PPP_FLAG; ap->last_xmit = jiffies; fcs = PPP_INITFCS; diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 33cb825..33255fe 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -108,7 +108,7 @@ static void ppp_print_hex (register __u8 * out, const __u8 * in, int count) { register __u8 next_ch; - static char hex[] = "0123456789ABCDEF"; + static const char hex[] = "0123456789ABCDEF"; while (count-- > 0) { next_ch = *in++; diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 8cc0d0b..0ad3310 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -113,11 +113,11 @@ static int media[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; static int num_media = 0; /* Maximum events (Rx packets, etc.) to handle at each interrupt. */ -static int max_interrupt_work = 20; +static const int max_interrupt_work = 20; /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). The RTL chips use a 64 element hash table based on the Ethernet CRC. */ -static int multicast_filter_limit = 32; +static const int multicast_filter_limit = 32; /* MAC address length */ #define MAC_ADDR_LEN 6 diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index b7f00d6..79208f4 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -57,23 +57,27 @@ #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> +#include <asm/div64.h> /* local include */ #include "s2io.h" #include "s2io-regs.h" -#define DRV_VERSION "Version 2.0.9.4" +#define DRV_VERSION "2.0.11.2" /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; static char s2io_driver_version[] = DRV_VERSION; -int rxd_size[4] = {32,48,48,64}; -int rxd_count[4] = {127,85,85,63}; +static int rxd_size[4] = {32,48,48,64}; +static int rxd_count[4] = {127,85,85,63}; static inline int RXD_IS_UP2DT(RxD_t *rxdp) { @@ -168,6 +172,11 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = { {"\n DRIVER STATISTICS"}, {"single_bit_ecc_errs"}, {"double_bit_ecc_errs"}, + ("lro_aggregated_pkts"), + ("lro_flush_both_count"), + ("lro_out_of_sequence_pkts"), + ("lro_flush_due_to_max_pkts"), + ("lro_avg_aggr_pkts"), }; #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN @@ -214,7 +223,7 @@ static void s2io_vlan_rx_kill_vid(struct net_device *dev, unsigned long vid) #define SWITCH_SIGN 0xA5A5A5A5A5A5A5A5ULL #define END_SIGN 0x0 -static u64 herc_act_dtx_cfg[] = { +static const u64 herc_act_dtx_cfg[] = { /* Set address */ 0x8000051536750000ULL, 0x80000515367500E0ULL, /* Write data */ @@ -235,7 +244,7 @@ static u64 herc_act_dtx_cfg[] = { END_SIGN }; -static u64 xena_mdio_cfg[] = { +static const u64 xena_mdio_cfg[] = { /* Reset PMA PLL */ 0xC001010000000000ULL, 0xC0010100000000E0ULL, 0xC0010100008000E4ULL, @@ -245,7 +254,7 @@ static u64 xena_mdio_cfg[] = { END_SIGN }; -static u64 xena_dtx_cfg[] = { +static const u64 xena_dtx_cfg[] = { 0x8000051500000000ULL, 0x80000515000000E0ULL, 0x80000515D93500E4ULL, 0x8001051500000000ULL, 0x80010515000000E0ULL, 0x80010515001E00E4ULL, @@ -273,7 +282,7 @@ static u64 xena_dtx_cfg[] = { * Constants for Fixing the MacAddress problem seen mostly on * Alpha machines. */ -static u64 fix_mac[] = { +static const u64 fix_mac[] = { 0x0060000000000000ULL, 0x0060600000000000ULL, 0x0040600000000000ULL, 0x0000600000000000ULL, 0x0020600000000000ULL, 0x0060600000000000ULL, @@ -317,6 +326,12 @@ static unsigned int indicate_max_pkts; static unsigned int rxsync_frequency = 3; /* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */ static unsigned int intr_type = 0; +/* Large receive offload feature */ +static unsigned int lro = 0; +/* Max pkts to be aggregated by LRO at one time. If not specified, + * aggregation happens until we hit max IP pkt size(64K) + */ +static unsigned int lro_max_pkts = 0xFFFF; /* * S2IO device table. @@ -1476,6 +1491,19 @@ static int init_nic(struct s2io_nic *nic) writel((u32) (val64 >> 32), (add + 4)); val64 = readq(&bar0->mac_cfg); + /* Enable FCS stripping by adapter */ + add = &bar0->mac_cfg; + val64 = readq(&bar0->mac_cfg); + val64 |= MAC_CFG_RMAC_STRIP_FCS; + if (nic->device_type == XFRAME_II_DEVICE) + writeq(val64, &bar0->mac_cfg); + else { + writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key); + writel((u32) (val64), add); + writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key); + writel((u32) (val64 >> 32), (add + 4)); + } + /* * Set the time value to be inserted in the pause frame * generated by xena. @@ -2127,7 +2155,7 @@ static void stop_nic(struct s2io_nic *nic) } } -int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb) +static int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb) { struct net_device *dev = nic->dev; struct sk_buff *frag_list; @@ -2569,6 +2597,8 @@ static void rx_intr_handler(ring_info_t *ring_data) #ifndef CONFIG_S2IO_NAPI int pkt_cnt = 0; #endif + int i; + spin_lock(&nic->rx_lock); if (atomic_read(&nic->card_state) == CARD_DOWN) { DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n", @@ -2661,6 +2691,18 @@ static void rx_intr_handler(ring_info_t *ring_data) break; #endif } + if (nic->lro) { + /* Clear all LRO sessions before exiting */ + for (i=0; i<MAX_LRO_SESSIONS; i++) { + lro_t *lro = &nic->lro0_n[i]; + if (lro->in_use) { + update_L3L4_header(nic, lro); + queue_rx_frame(lro->parent); + clear_lro_session(lro); + } + } + } + spin_unlock(&nic->rx_lock); } @@ -2852,7 +2894,7 @@ static int wait_for_cmd_complete(nic_t * sp) * void. */ -void s2io_reset(nic_t * sp) +static void s2io_reset(nic_t * sp) { XENA_dev_config_t __iomem *bar0 = sp->bar0; u64 val64; @@ -2940,7 +2982,7 @@ void s2io_reset(nic_t * sp) * SUCCESS on success and FAILURE on failure. */ -int s2io_set_swapper(nic_t * sp) +static int s2io_set_swapper(nic_t * sp) { struct net_device *dev = sp->dev; XENA_dev_config_t __iomem *bar0 = sp->bar0; @@ -3089,7 +3131,7 @@ static int wait_for_msix_trans(nic_t *nic, int i) return ret; } -void restore_xmsi_data(nic_t *nic) +static void restore_xmsi_data(nic_t *nic) { XENA_dev_config_t __iomem *bar0 = nic->bar0; u64 val64; @@ -3180,7 +3222,7 @@ int s2io_enable_msi(nic_t *nic) return 0; } -int s2io_enable_msi_x(nic_t *nic) +static int s2io_enable_msi_x(nic_t *nic) { XENA_dev_config_t __iomem *bar0 = nic->bar0; u64 tx_mat, rx_mat; @@ -3668,23 +3710,32 @@ s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs) * else schedule a tasklet to reallocate the buffers. */ for (i = 0; i < config->rx_ring_num; i++) { - int rxb_size = atomic_read(&sp->rx_bufs_left[i]); - int level = rx_buffer_level(sp, rxb_size, i); - - if ((level == PANIC) && (!TASKLET_IN_USE)) { - DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name); - DBG_PRINT(INTR_DBG, "PANIC levels\n"); - if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { - DBG_PRINT(ERR_DBG, "%s:Out of memory", - dev->name); - DBG_PRINT(ERR_DBG, " in ISR!!\n"); + if (!sp->lro) { + int rxb_size = atomic_read(&sp->rx_bufs_left[i]); + int level = rx_buffer_level(sp, rxb_size, i); + + if ((level == PANIC) && (!TASKLET_IN_USE)) { + DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", + dev->name); + DBG_PRINT(INTR_DBG, "PANIC levels\n"); + if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in ISR!!\n"); + clear_bit(0, (&sp->tasklet_status)); + atomic_dec(&sp->isr_cnt); + return IRQ_HANDLED; + } clear_bit(0, (&sp->tasklet_status)); - atomic_dec(&sp->isr_cnt); - return IRQ_HANDLED; + } else if (level == LOW) { + tasklet_schedule(&sp->task); } - clear_bit(0, (&sp->tasklet_status)); - } else if (level == LOW) { - tasklet_schedule(&sp->task); + } + else if (fill_rx_buffers(sp, i) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in Rx Intr!!\n"); + break; } } @@ -3697,29 +3748,37 @@ s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs) { ring_info_t *ring = (ring_info_t *)dev_id; nic_t *sp = ring->nic; + struct net_device *dev = (struct net_device *) dev_id; int rxb_size, level, rng_n; atomic_inc(&sp->isr_cnt); rx_intr_handler(ring); rng_n = ring->ring_no; - rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]); - level = rx_buffer_level(sp, rxb_size, rng_n); - - if ((level == PANIC) && (!TASKLET_IN_USE)) { - int ret; - DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__); - DBG_PRINT(INTR_DBG, "PANIC levels\n"); - if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) { - DBG_PRINT(ERR_DBG, "Out of memory in %s", - __FUNCTION__); + if (!sp->lro) { + rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]); + level = rx_buffer_level(sp, rxb_size, rng_n); + + if ((level == PANIC) && (!TASKLET_IN_USE)) { + int ret; + DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__); + DBG_PRINT(INTR_DBG, "PANIC levels\n"); + if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "Out of memory in %s", + __FUNCTION__); + clear_bit(0, (&sp->tasklet_status)); + return IRQ_HANDLED; + } clear_bit(0, (&sp->tasklet_status)); - return IRQ_HANDLED; + } else if (level == LOW) { + tasklet_schedule(&sp->task); } - clear_bit(0, (&sp->tasklet_status)); - } else if (level == LOW) { - tasklet_schedule(&sp->task); } + else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name); + DBG_PRINT(ERR_DBG, " in Rx Intr!!\n"); + } + atomic_dec(&sp->isr_cnt); return IRQ_HANDLED; @@ -3875,24 +3934,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs) */ #ifndef CONFIG_S2IO_NAPI for (i = 0; i < config->rx_ring_num; i++) { - int ret; - int rxb_size = atomic_read(&sp->rx_bufs_left[i]); - int level = rx_buffer_level(sp, rxb_size, i); - - if ((level == PANIC) && (!TASKLET_IN_USE)) { - DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name); - DBG_PRINT(INTR_DBG, "PANIC levels\n"); - if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { - DBG_PRINT(ERR_DBG, "%s:Out of memory", - dev->name); - DBG_PRINT(ERR_DBG, " in ISR!!\n"); + if (!sp->lro) { + int ret; + int rxb_size = atomic_read(&sp->rx_bufs_left[i]); + int level = rx_buffer_level(sp, rxb_size, i); + + if ((level == PANIC) && (!TASKLET_IN_USE)) { + DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", + dev->name); + DBG_PRINT(INTR_DBG, "PANIC levels\n"); + if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in ISR!!\n"); + clear_bit(0, (&sp->tasklet_status)); + atomic_dec(&sp->isr_cnt); + return IRQ_HANDLED; + } clear_bit(0, (&sp->tasklet_status)); - atomic_dec(&sp->isr_cnt); - return IRQ_HANDLED; + } else if (level == LOW) { + tasklet_schedule(&sp->task); } - clear_bit(0, (&sp->tasklet_status)); - } else if (level == LOW) { - tasklet_schedule(&sp->task); + } + else if (fill_rx_buffers(sp, i) == -ENOMEM) { + DBG_PRINT(ERR_DBG, "%s:Out of memory", + dev->name); + DBG_PRINT(ERR_DBG, " in Rx intr!!\n"); + break; } } #endif @@ -4129,7 +4197,7 @@ static void s2io_set_multicast(struct net_device *dev) * as defined in errno.h file on failure. */ -int s2io_set_mac_addr(struct net_device *dev, u8 * addr) +static int s2io_set_mac_addr(struct net_device *dev, u8 * addr) { nic_t *sp = dev->priv; XENA_dev_config_t __iomem *bar0 = sp->bar0; @@ -5044,6 +5112,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev, int i = 0; nic_t *sp = dev->priv; StatInfo_t *stat_info = sp->mac_control.stats_info; + u64 tmp; s2io_updt_stats(sp); tmp_stats[i++] = @@ -5135,6 +5204,16 @@ static void s2io_get_ethtool_stats(struct net_device *dev, tmp_stats[i++] = 0; tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs; tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs; + tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt; + tmp_stats[i++] = stat_info->sw_stat.sending_both; + tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts; + tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts; + tmp = 0; + if (stat_info->sw_stat.num_aggregations) { + tmp = stat_info->sw_stat.sum_avg_pkts_aggregated; + do_div(tmp, stat_info->sw_stat.num_aggregations); + } + tmp_stats[i++] = tmp; } static int s2io_ethtool_get_regs_len(struct net_device *dev) @@ -5516,6 +5595,14 @@ static int s2io_card_up(nic_t * sp) /* Setting its receive mode */ s2io_set_multicast(dev); + if (sp->lro) { + /* Initialize max aggregatable pkts based on MTU */ + sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu; + /* Check if we can use(if specified) user provided value */ + if (lro_max_pkts < sp->lro_max_aggr_per_sess) + sp->lro_max_aggr_per_sess = lro_max_pkts; + } + /* Enable tasklet for the device */ tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev); @@ -5608,6 +5695,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) ((unsigned long) rxdp->Host_Control); int ring_no = ring_data->ring_no; u16 l3_csum, l4_csum; + lro_t *lro; skb->dev = dev; if (rxdp->Control_1 & RXD_T_CODE) { @@ -5656,7 +5744,8 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) skb_put(skb, buf2_len); } - if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && + if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) || + (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) && (sp->rx_csum)) { l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); @@ -5667,6 +5756,54 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) * a flag in the RxD. */ skb->ip_summed = CHECKSUM_UNNECESSARY; + if (sp->lro) { + u32 tcp_len; + u8 *tcp; + int ret = 0; + + ret = s2io_club_tcp_session(skb->data, &tcp, + &tcp_len, &lro, rxdp, sp); + switch (ret) { + case 3: /* Begin anew */ + lro->parent = skb; + goto aggregate; + case 1: /* Aggregate */ + { + lro_append_pkt(sp, lro, + skb, tcp_len); + goto aggregate; + } + case 4: /* Flush session */ + { + lro_append_pkt(sp, lro, + skb, tcp_len); + queue_rx_frame(lro->parent); + clear_lro_session(lro); + sp->mac_control.stats_info-> + sw_stat.flush_max_pkts++; + goto aggregate; + } + case 2: /* Flush both */ + lro->parent->data_len = + lro->frags_len; + sp->mac_control.stats_info-> + sw_stat.sending_both++; + queue_rx_frame(lro->parent); + clear_lro_session(lro); + goto send_up; + case 0: /* sessions exceeded */ + case 5: /* + * First pkt in session not + * L3/L4 aggregatable + */ + break; + default: + DBG_PRINT(ERR_DBG, + "%s: Samadhana!!\n", + __FUNCTION__); + BUG(); + } + } } else { /* * Packet with erroneous checksum, let the @@ -5678,25 +5815,31 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) skb->ip_summed = CHECKSUM_NONE; } - skb->protocol = eth_type_trans(skb, dev); + if (!sp->lro) { + skb->protocol = eth_type_trans(skb, dev); #ifdef CONFIG_S2IO_NAPI - if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { - /* Queueing the vlan frame to the upper layer */ - vlan_hwaccel_receive_skb(skb, sp->vlgrp, - RXD_GET_VLAN_TAG(rxdp->Control_2)); - } else { - netif_receive_skb(skb); - } + if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { + /* Queueing the vlan frame to the upper layer */ + vlan_hwaccel_receive_skb(skb, sp->vlgrp, + RXD_GET_VLAN_TAG(rxdp->Control_2)); + } else { + netif_receive_skb(skb); + } #else - if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { - /* Queueing the vlan frame to the upper layer */ - vlan_hwaccel_rx(skb, sp->vlgrp, - RXD_GET_VLAN_TAG(rxdp->Control_2)); - } else { - netif_rx(skb); - } + if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) { + /* Queueing the vlan frame to the upper layer */ + vlan_hwaccel_rx(skb, sp->vlgrp, + RXD_GET_VLAN_TAG(rxdp->Control_2)); + } else { + netif_rx(skb); + } #endif + } else { +send_up: + queue_rx_frame(skb); + } dev->last_rx = jiffies; +aggregate: atomic_dec(&sp->rx_bufs_left[ring_no]); return SUCCESS; } @@ -5714,7 +5857,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp) * void. */ -void s2io_link(nic_t * sp, int link) +static void s2io_link(nic_t * sp, int link) { struct net_device *dev = (struct net_device *) sp->dev; @@ -5739,7 +5882,7 @@ void s2io_link(nic_t * sp, int link) * returns the revision ID of the device. */ -int get_xena_rev_id(struct pci_dev *pdev) +static int get_xena_rev_id(struct pci_dev *pdev) { u8 id = 0; int ret; @@ -5808,6 +5951,8 @@ module_param(indicate_max_pkts, int, 0); #endif module_param(rxsync_frequency, int, 0); module_param(intr_type, int, 0); +module_param(lro, int, 0); +module_param(lro_max_pkts, int, 0); /** * s2io_init_nic - Initialization of the adapter . @@ -5939,6 +6084,7 @@ Defaulting to INTA\n"); else sp->device_type = XFRAME_I_DEVICE; + sp->lro = lro; /* Initialize some PCI/PCI-X fields of the NIC. */ s2io_init_pci(sp); @@ -6242,6 +6388,10 @@ Defaulting to INTA\n"); DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been " "enabled\n",dev->name); + if (sp->lro) + DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n", + dev->name); + /* Initialize device name */ strcpy(sp->name, dev->name); if (sp->device_type & XFRAME_II_DEVICE) @@ -6344,7 +6494,7 @@ int __init s2io_starter(void) * Description: This function is the cleanup routine for the driver. It unregist * ers the driver. */ -void s2io_closer(void) +static void s2io_closer(void) { pci_unregister_driver(&s2io_driver); DBG_PRINT(INIT_DBG, "cleanup done\n"); @@ -6352,3 +6502,318 @@ void s2io_closer(void) module_init(s2io_starter); module_exit(s2io_closer); + +static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip, + struct tcphdr **tcp, RxD_t *rxdp) +{ + int ip_off; + u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len; + + if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) { + DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n", + __FUNCTION__); + return -1; + } + + /* TODO: + * By default the VLAN field in the MAC is stripped by the card, if this + * feature is turned off in rx_pa_cfg register, then the ip_off field + * has to be shifted by a further 2 bytes + */ + switch (l2_type) { + case 0: /* DIX type */ + case 4: /* DIX type with VLAN */ + ip_off = HEADER_ETHERNET_II_802_3_SIZE; + break; + /* LLC, SNAP etc are considered non-mergeable */ + default: + return -1; + } + + *ip = (struct iphdr *)((u8 *)buffer + ip_off); + ip_len = (u8)((*ip)->ihl); + ip_len <<= 2; + *tcp = (struct tcphdr *)((unsigned long)*ip + ip_len); + + return 0; +} + +static int check_for_socket_match(lro_t *lro, struct iphdr *ip, + struct tcphdr *tcp) +{ + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) || + (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest)) + return -1; + return 0; +} + +static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp) +{ + return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2)); +} + +static void initiate_new_session(lro_t *lro, u8 *l2h, + struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len) +{ + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + lro->l2h = l2h; + lro->iph = ip; + lro->tcph = tcp; + lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq); + lro->tcp_ack = ntohl(tcp->ack_seq); + lro->sg_num = 1; + lro->total_len = ntohs(ip->tot_len); + lro->frags_len = 0; + /* + * check if we saw TCP timestamp. Other consistency checks have + * already been done. + */ + if (tcp->doff == 8) { + u32 *ptr; + ptr = (u32 *)(tcp+1); + lro->saw_ts = 1; + lro->cur_tsval = *(ptr+1); + lro->cur_tsecr = *(ptr+2); + } + lro->in_use = 1; +} + +static void update_L3L4_header(nic_t *sp, lro_t *lro) +{ + struct iphdr *ip = lro->iph; + struct tcphdr *tcp = lro->tcph; + u16 nchk; + StatInfo_t *statinfo = sp->mac_control.stats_info; + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + + /* Update L3 header */ + ip->tot_len = htons(lro->total_len); + ip->check = 0; + nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl); + ip->check = nchk; + + /* Update L4 header */ + tcp->ack_seq = lro->tcp_ack; + tcp->window = lro->window; + + /* Update tsecr field if this session has timestamps enabled */ + if (lro->saw_ts) { + u32 *ptr = (u32 *)(tcp + 1); + *(ptr+2) = lro->cur_tsecr; + } + + /* Update counters required for calculation of + * average no. of packets aggregated. + */ + statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num; + statinfo->sw_stat.num_aggregations++; +} + +static void aggregate_new_rx(lro_t *lro, struct iphdr *ip, + struct tcphdr *tcp, u32 l4_pyld) +{ + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + lro->total_len += l4_pyld; + lro->frags_len += l4_pyld; + lro->tcp_next_seq += l4_pyld; + lro->sg_num++; + + /* Update ack seq no. and window ad(from this pkt) in LRO object */ + lro->tcp_ack = tcp->ack_seq; + lro->window = tcp->window; + + if (lro->saw_ts) { + u32 *ptr; + /* Update tsecr and tsval from this packet */ + ptr = (u32 *) (tcp + 1); + lro->cur_tsval = *(ptr + 1); + lro->cur_tsecr = *(ptr + 2); + } +} + +static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip, + struct tcphdr *tcp, u32 tcp_pyld_len) +{ + u8 *ptr; + + DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__); + + if (!tcp_pyld_len) { + /* Runt frame or a pure ack */ + return -1; + } + + if (ip->ihl != 5) /* IP has options */ + return -1; + + if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin || + !tcp->ack) { + /* + * Currently recognize only the ack control word and + * any other control field being set would result in + * flushing the LRO session + */ + return -1; + } + + /* + * Allow only one TCP timestamp option. Don't aggregate if + * any other options are detected. + */ + if (tcp->doff != 5 && tcp->doff != 8) + return -1; + + if (tcp->doff == 8) { + ptr = (u8 *)(tcp + 1); + while (*ptr == TCPOPT_NOP) + ptr++; + if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP) + return -1; + + /* Ensure timestamp value increases monotonically */ + if (l_lro) + if (l_lro->cur_tsval > *((u32 *)(ptr+2))) + return -1; + + /* timestamp echo reply should be non-zero */ + if (*((u32 *)(ptr+6)) == 0) + return -1; + } + + return 0; +} + +static int +s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, + RxD_t *rxdp, nic_t *sp) +{ + struct iphdr *ip; + struct tcphdr *tcph; + int ret = 0, i; + + if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp, + rxdp))) { + DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n", + ip->saddr, ip->daddr); + } else { + return ret; + } + + tcph = (struct tcphdr *)*tcp; + *tcp_len = get_l4_pyld_length(ip, tcph); + for (i=0; i<MAX_LRO_SESSIONS; i++) { + lro_t *l_lro = &sp->lro0_n[i]; + if (l_lro->in_use) { + if (check_for_socket_match(l_lro, ip, tcph)) + continue; + /* Sock pair matched */ + *lro = l_lro; + + if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) { + DBG_PRINT(INFO_DBG, "%s:Out of order. expected " + "0x%x, actual 0x%x\n", __FUNCTION__, + (*lro)->tcp_next_seq, + ntohl(tcph->seq)); + + sp->mac_control.stats_info-> + sw_stat.outof_sequence_pkts++; + ret = 2; + break; + } + + if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len)) + ret = 1; /* Aggregate */ + else + ret = 2; /* Flush both */ + break; + } + } + + if (ret == 0) { + /* Before searching for available LRO objects, + * check if the pkt is L3/L4 aggregatable. If not + * don't create new LRO session. Just send this + * packet up. + */ + if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) { + return 5; + } + + for (i=0; i<MAX_LRO_SESSIONS; i++) { + lro_t *l_lro = &sp->lro0_n[i]; + if (!(l_lro->in_use)) { + *lro = l_lro; + ret = 3; /* Begin anew */ + break; + } + } + } + + if (ret == 0) { /* sessions exceeded */ + DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n", + __FUNCTION__); + *lro = NULL; + return ret; + } + + switch (ret) { + case 3: + initiate_new_session(*lro, buffer, ip, tcph, *tcp_len); + break; + case 2: + update_L3L4_header(sp, *lro); + break; + case 1: + aggregate_new_rx(*lro, ip, tcph, *tcp_len); + if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) { + update_L3L4_header(sp, *lro); + ret = 4; /* Flush the LRO */ + } + break; + default: + DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n", + __FUNCTION__); + break; + } + + return ret; +} + +static void clear_lro_session(lro_t *lro) +{ + static u16 lro_struct_size = sizeof(lro_t); + + memset(lro, 0, lro_struct_size); +} + +static void queue_rx_frame(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + + skb->protocol = eth_type_trans(skb, dev); +#ifdef CONFIG_S2IO_NAPI + netif_receive_skb(skb); +#else + netif_rx(skb); +#endif +} + +static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, + u32 tcp_len) +{ + struct sk_buff *tmp, *first = lro->parent; + + first->len += tcp_len; + first->data_len = lro->frags_len; + skb_pull(skb, (skb->len - tcp_len)); + if ((tmp = skb_shinfo(first)->frag_list)) { + while (tmp->next) + tmp = tmp->next; + tmp->next = skb; + } + else + skb_shinfo(first)->frag_list = skb; + sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++; + return; +} diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 852a6a8..0a0b5b2 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -64,7 +64,7 @@ typedef enum xena_max_outstanding_splits { #define INTR_DBG 4 /* Global variable that defines the present debug level of the driver. */ -int debug_level = ERR_DBG; /* Default level. */ +static int debug_level = ERR_DBG; /* DEBUG message print. */ #define DBG_PRINT(dbg_level, args...) if(!(debug_level<dbg_level)) printk(args) @@ -78,6 +78,13 @@ int debug_level = ERR_DBG; /* Default level. */ typedef struct { unsigned long long single_ecc_errs; unsigned long long double_ecc_errs; + /* LRO statistics */ + unsigned long long clubbed_frms_cnt; + unsigned long long sending_both; + unsigned long long outof_sequence_pkts; + unsigned long long flush_max_pkts; + unsigned long long sum_avg_pkts_aggregated; + unsigned long long num_aggregations; } swStat_t; /* The statistics block of Xena */ @@ -268,7 +275,7 @@ typedef struct stat_block { #define MAX_RX_RINGS 8 /* FIFO mappings for all possible number of fifos configured */ -int fifo_map[][MAX_TX_FIFOS] = { +static int fifo_map[][MAX_TX_FIFOS] = { {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 1, 1, 1, 2, 2}, @@ -680,6 +687,24 @@ struct msix_info_st { u64 data; }; +/* Data structure to represent a LRO session */ +typedef struct lro { + struct sk_buff *parent; + u8 *l2h; + struct iphdr *iph; + struct tcphdr *tcph; + u32 tcp_next_seq; + u32 tcp_ack; + int total_len; + int frags_len; + int sg_num; + int in_use; + u16 window; + u32 cur_tsval; + u32 cur_tsecr; + u8 saw_ts; +}lro_t; + /* Structure representing one instance of the NIC */ struct s2io_nic { int rxd_mode; @@ -784,6 +809,13 @@ struct s2io_nic { #define XFRAME_II_DEVICE 2 u8 device_type; +#define MAX_LRO_SESSIONS 32 + lro_t lro0_n[MAX_LRO_SESSIONS]; + unsigned long clubbed_frms_cnt; + unsigned long sending_both; + u8 lro; + u16 lro_max_aggr_per_sess; + #define INTA 0 #define MSI 1 #define MSI_X 2 @@ -911,18 +943,16 @@ static void tx_intr_handler(fifo_info_t *fifo_data); static void alarm_intr_handler(struct s2io_nic *sp); static int s2io_starter(void); -void s2io_closer(void); static void s2io_tx_watchdog(struct net_device *dev); static void s2io_tasklet(unsigned long dev_addr); static void s2io_set_multicast(struct net_device *dev); static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp); -void s2io_link(nic_t * sp, int link); -void s2io_reset(nic_t * sp); +static void s2io_link(nic_t * sp, int link); #if defined(CONFIG_S2IO_NAPI) static int s2io_poll(struct net_device *dev, int *budget); #endif static void s2io_init_pci(nic_t * sp); -int s2io_set_mac_addr(struct net_device *dev, u8 * addr); +static int s2io_set_mac_addr(struct net_device *dev, u8 * addr); static void s2io_alarm_handle(unsigned long data); static int s2io_enable_msi(nic_t *nic); static irqreturn_t s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs); @@ -930,14 +960,19 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs); static irqreturn_t s2io_msix_fifo_handle(int irq, void *dev_id, struct pt_regs *regs); -int s2io_enable_msi_x(nic_t *nic); static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs); static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag); static struct ethtool_ops netdev_ethtool_ops; static void s2io_set_link(unsigned long data); -int s2io_set_swapper(nic_t * sp); +static int s2io_set_swapper(nic_t * sp); static void s2io_card_down(nic_t *nic); static int s2io_card_up(nic_t *nic); -int get_xena_rev_id(struct pci_dev *pdev); -void restore_xmsi_data(nic_t *nic); +static int get_xena_rev_id(struct pci_dev *pdev); +static void restore_xmsi_data(nic_t *nic); + +static int s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, RxD_t *rxdp, nic_t *sp); +static void clear_lro_session(lro_t *lro); +static void queue_rx_frame(struct sk_buff *skb); +static void update_L3L4_header(nic_t *sp, lro_t *lro); +static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, u32 tcp_len); #endif /* _S2IO_H */ diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index 7613947..66cf226 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -59,7 +59,7 @@ static char version[] = "sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)\n"; #ifdef SB1000_DEBUG static int sb1000_debug = SB1000_DEBUG; #else -static int sb1000_debug = 1; +static const int sb1000_debug = 1; #endif static const int SB1000_IO_EXTENT = 8; diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c index aa4ca18..f2be9f8 100644 --- a/drivers/net/sb1250-mac.c +++ b/drivers/net/sb1250-mac.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001,2002,2003 Broadcom Corporation + * Copyright (C) 2001,2002,2003,2004 Broadcom Corporation * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -43,6 +43,7 @@ #define SBMAC_ETH0_HWADDR "40:00:00:00:01:00" #define SBMAC_ETH1_HWADDR "40:00:00:00:01:01" #define SBMAC_ETH2_HWADDR "40:00:00:00:01:02" +#define SBMAC_ETH3_HWADDR "40:00:00:00:01:03" #endif @@ -57,7 +58,7 @@ static char version1[] __devinitdata = #define CONFIG_SBMAC_COALESCE -#define MAX_UNITS 3 /* More are supported, limit only on options */ +#define MAX_UNITS 4 /* More are supported, limit only on options */ /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (2*HZ) @@ -85,11 +86,11 @@ MODULE_PARM_DESC(noisy_mii, "MII status messages"); The media type is usually passed in 'options[]'. */ #ifdef MODULE -static int options[MAX_UNITS] = {-1, -1, -1}; +static int options[MAX_UNITS] = {-1, -1, -1, -1}; module_param_array(options, int, NULL, S_IRUGO); MODULE_PARM_DESC(options, "1-" __MODULE_STRING(MAX_UNITS)); -static int full_duplex[MAX_UNITS] = {-1, -1, -1}; +static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1}; module_param_array(full_duplex, int, NULL, S_IRUGO); MODULE_PARM_DESC(full_duplex, "1-" __MODULE_STRING(MAX_UNITS)); #endif @@ -105,13 +106,26 @@ MODULE_PARM_DESC(int_timeout, "Timeout value"); #endif #include <asm/sibyte/sb1250.h> -#include <asm/sibyte/sb1250_defs.h> +#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) +#include <asm/sibyte/bcm1480_regs.h> +#include <asm/sibyte/bcm1480_int.h> +#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X) #include <asm/sibyte/sb1250_regs.h> -#include <asm/sibyte/sb1250_mac.h> -#include <asm/sibyte/sb1250_dma.h> #include <asm/sibyte/sb1250_int.h> +#else +#error invalid SiByte MAC configuation +#endif #include <asm/sibyte/sb1250_scd.h> +#include <asm/sibyte/sb1250_mac.h> +#include <asm/sibyte/sb1250_dma.h> +#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) +#define UNIT_INT(n) (K_BCM1480_INT_MAC_0 + ((n) * 2)) +#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X) +#define UNIT_INT(n) (K_INT_MAC_0 + (n)) +#else +#error invalid SiByte MAC configuation +#endif /********************************************************************** * Simple types @@ -1476,10 +1490,10 @@ static void sbmac_channel_start(struct sbmac_softc *s) * and make sure that RD_THRSH + WR_THRSH <=128 for pass2 and above * Use a larger RD_THRSH for gigabit */ - if (periph_rev >= 2) - th_value = 64; - else + if (soc_type == K_SYS_SOC_TYPE_BCM1250 && periph_rev < 2) th_value = 28; + else + th_value = 64; fifo = V_MAC_TX_WR_THRSH(4) | /* Must be '4' or '8' */ ((s->sbm_speed == sbmac_speed_1000) @@ -1589,13 +1603,17 @@ static void sbmac_channel_start(struct sbmac_softc *s) * Turn on the rest of the bits in the enable register */ +#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) + __raw_writeq(M_MAC_RXDMA_EN0 | + M_MAC_TXDMA_EN0, s->sbm_macenable); +#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X) __raw_writeq(M_MAC_RXDMA_EN0 | M_MAC_TXDMA_EN0 | M_MAC_RX_ENABLE | M_MAC_TX_ENABLE, s->sbm_macenable); - - - +#else +#error invalid SiByte MAC configuation +#endif #ifdef CONFIG_SBMAC_COALESCE /* @@ -1786,11 +1804,12 @@ static void sbmac_set_iphdr_offset(struct sbmac_softc *sc) reg &= ~M_MAC_IPHDR_OFFSET | V_MAC_IPHDR_OFFSET(15); __raw_writeq(reg, sc->sbm_rxfilter); - /* read system identification to determine revision */ - if (periph_rev >= 2) { - sc->rx_hw_checksum = ENABLE; - } else { + /* BCM1250 pass1 didn't have hardware checksum. Everything + later does. */ + if (soc_type == K_SYS_SOC_TYPE_BCM1250 && periph_rev < 2) { sc->rx_hw_checksum = DISABLE; + } else { + sc->rx_hw_checksum = ENABLE; } } @@ -2220,7 +2239,7 @@ static void sbmac_setmulti(struct sbmac_softc *sc) -#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) +#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR) /********************************************************************** * SBMAC_PARSE_XDIGIT(str) * @@ -2792,7 +2811,7 @@ static int sbmac_close(struct net_device *dev) -#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) +#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR) static void sbmac_setup_hwaddr(int chan,char *addr) { @@ -2818,25 +2837,7 @@ sbmac_init_module(void) unsigned long port; int chip_max_units; - /* - * For bringup when not using the firmware, we can pre-fill - * the MAC addresses using the environment variables - * specified in this file (or maybe from the config file?) - */ -#ifdef SBMAC_ETH0_HWADDR - sbmac_setup_hwaddr(0,SBMAC_ETH0_HWADDR); -#endif -#ifdef SBMAC_ETH1_HWADDR - sbmac_setup_hwaddr(1,SBMAC_ETH1_HWADDR); -#endif -#ifdef SBMAC_ETH2_HWADDR - sbmac_setup_hwaddr(2,SBMAC_ETH2_HWADDR); -#endif - - /* - * Walk through the Ethernet controllers and find - * those who have their MAC addresses set. - */ + /* Set the number of available units based on the SOC type. */ switch (soc_type) { case K_SYS_SOC_TYPE_BCM1250: case K_SYS_SOC_TYPE_BCM1250_ALT: @@ -2848,6 +2849,10 @@ sbmac_init_module(void) case K_SYS_SOC_TYPE_BCM1250_ALT2: /* Hybrid */ chip_max_units = 2; break; + case K_SYS_SOC_TYPE_BCM1x55: + case K_SYS_SOC_TYPE_BCM1x80: + chip_max_units = 4; + break; default: chip_max_units = 0; break; @@ -2855,6 +2860,32 @@ sbmac_init_module(void) if (chip_max_units > MAX_UNITS) chip_max_units = MAX_UNITS; + /* + * For bringup when not using the firmware, we can pre-fill + * the MAC addresses using the environment variables + * specified in this file (or maybe from the config file?) + */ +#ifdef SBMAC_ETH0_HWADDR + if (chip_max_units > 0) + sbmac_setup_hwaddr(0,SBMAC_ETH0_HWADDR); +#endif +#ifdef SBMAC_ETH1_HWADDR + if (chip_max_units > 1) + sbmac_setup_hwaddr(1,SBMAC_ETH1_HWADDR); +#endif +#ifdef SBMAC_ETH2_HWADDR + if (chip_max_units > 2) + sbmac_setup_hwaddr(2,SBMAC_ETH2_HWADDR); +#endif +#ifdef SBMAC_ETH3_HWADDR + if (chip_max_units > 3) + sbmac_setup_hwaddr(3,SBMAC_ETH3_HWADDR); +#endif + + /* + * Walk through the Ethernet controllers and find + * those who have their MAC addresses set. + */ for (idx = 0; idx < chip_max_units; idx++) { /* @@ -2886,7 +2917,7 @@ sbmac_init_module(void) printk(KERN_DEBUG "sbmac: configuring MAC at %lx\n", port); - dev->irq = K_INT_MAC_0 + idx; + dev->irq = UNIT_INT(idx); dev->base_addr = port; dev->mem_end = 0; if (sbmac_init(dev, idx)) { diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c index 79dca39..bcef03f 100644 --- a/drivers/net/seeq8005.c +++ b/drivers/net/seeq8005.c @@ -46,6 +46,7 @@ static const char version[] = #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/io.h> @@ -699,7 +700,7 @@ static void hardware_send_packet(struct net_device * dev, char *buf, int length) int ioaddr = dev->base_addr; int status = inw(SEEQ_STATUS); int transmit_ptr = 0; - int tmp; + unsigned long tmp; if (net_debug>4) { printk("%s: send 0x%04x\n",dev->name,length); @@ -724,7 +725,7 @@ static void hardware_send_packet(struct net_device * dev, char *buf, int length) /* drain FIFO */ tmp = jiffies; - while ( (((status=inw(SEEQ_STATUS)) & SEEQSTAT_FIFO_EMPTY) == 0) && (jiffies - tmp < HZ)) + while ( (((status=inw(SEEQ_STATUS)) & SEEQSTAT_FIFO_EMPTY) == 0) && time_before(jiffies, tmp + HZ)) mb(); /* doit ! */ diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c index a4614df..f95a5b0 100644 --- a/drivers/net/sgiseeq.c +++ b/drivers/net/sgiseeq.c @@ -3,6 +3,9 @@ * * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) */ + +#undef DEBUG + #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -59,8 +62,6 @@ static char *sgiseeqstr = "SGI Seeq8003"; sp->tx_old + (SEEQ_TX_BUFFERS - 1) - sp->tx_new : \ sp->tx_old - sp->tx_new - 1) -#define DEBUG - struct sgiseeq_rx_desc { volatile struct hpc_dma_desc rdma; volatile signed int buf_vaddr; @@ -209,7 +210,7 @@ static int seeq_init_ring(struct net_device *dev) static struct sgiseeq_private *gpriv; static struct net_device *gdev; -void sgiseeq_dump_rings(void) +static void sgiseeq_dump_rings(void) { static int once; struct sgiseeq_rx_desc *r = gpriv->rx_desc; @@ -311,9 +312,9 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp struct sgiseeq_regs *sregs) { struct sgiseeq_rx_desc *rd; - struct sk_buff *skb = 0; + struct sk_buff *skb = NULL; unsigned char pkt_status; - unsigned char *pkt_pointer = 0; + unsigned char *pkt_pointer = NULL; int len = 0; unsigned int orig_end = PREV_RX(sp->rx_new); @@ -515,12 +516,6 @@ static inline int sgiseeq_reset(struct net_device *dev) return 0; } -void sgiseeq_my_reset(void) -{ - printk("RESET!\n"); - sgiseeq_reset(gdev); -} - static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sgiseeq_private *sp = netdev_priv(dev); diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index 221354e..88e2120 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -83,6 +83,7 @@ #include <linux/if_arp.h> #include <linux/init.h> #include <linux/if_shaper.h> +#include <linux/jiffies.h> #include <net/dst.h> #include <net/arp.h> @@ -168,7 +169,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) /* * Queue over time. Spill packet. */ - if(SHAPERCB(skb)->shapeclock-jiffies > SHAPER_LATENCY) { + if(time_after(SHAPERCB(skb)->shapeclock,jiffies + SHAPER_LATENCY)) { dev_kfree_skb(skb); shaper->stats.tx_dropped++; } else diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index ed4bc91..31dd3f0 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -366,7 +366,7 @@ static const u32 sis190_intr_mask = * Maximum number of multicast addresses to filter (vs. Rx-all-multicast). * The chips use a 64 element hash table based on the Ethernet CRC. */ -static int multicast_filter_limit = 32; +static const int multicast_filter_limit = 32; static void __mdio_cmd(void __iomem *ioaddr, u32 ctl) { diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index 7a952fe..a1cb07c 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -100,7 +100,7 @@ enum { SIS_900 = 0, SIS_7016 }; -static char * card_names[] = { +static const char * card_names[] = { "SiS 900 PCI Fast Ethernet", "SiS 7016 PCI Fast Ethernet" }; @@ -115,7 +115,7 @@ MODULE_DEVICE_TABLE (pci, sis900_pci_tbl); static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex); -static struct mii_chip_info { +static const struct mii_chip_info { const char * name; u16 phy_id0; u16 phy_id1; @@ -400,7 +400,7 @@ static int __devinit sis900_probe(struct pci_dev *pci_dev, void *ring_space; long ioaddr; int i, ret; - char *card_name = card_names[pci_id->driver_data]; + const char *card_name = card_names[pci_id->driver_data]; const char *dev_name = pci_name(pci_dev); /* when built into the kernel, we only print version if device is found */ @@ -1275,7 +1275,7 @@ static void sis900_timer(unsigned long data) struct net_device *net_dev = (struct net_device *)data; struct sis900_private *sis_priv = net_dev->priv; struct mii_phy *mii_phy = sis_priv->mii; - static int next_tick = 5*HZ; + static const int next_tick = 5*HZ; u16 status; if (!sis_priv->autong_complete){ diff --git a/drivers/net/sk98lin/h/skaddr.h b/drivers/net/sk98lin/h/skaddr.h index 3a2ea4a..423ad06 100644 --- a/drivers/net/sk98lin/h/skaddr.h +++ b/drivers/net/sk98lin/h/skaddr.h @@ -236,18 +236,6 @@ extern int SkAddrMcClear( SK_U32 PortNumber, int Flags); -extern int SkAddrXmacMcClear( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber, - int Flags); - -extern int SkAddrGmacMcClear( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber, - int Flags); - extern int SkAddrMcAdd( SK_AC *pAC, SK_IOC IoC, @@ -255,35 +243,11 @@ extern int SkAddrMcAdd( SK_MAC_ADDR *pMc, int Flags); -extern int SkAddrXmacMcAdd( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber, - SK_MAC_ADDR *pMc, - int Flags); - -extern int SkAddrGmacMcAdd( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber, - SK_MAC_ADDR *pMc, - int Flags); - extern int SkAddrMcUpdate( SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber); -extern int SkAddrXmacMcUpdate( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber); - -extern int SkAddrGmacMcUpdate( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber); - extern int SkAddrOverride( SK_AC *pAC, SK_IOC IoC, @@ -297,18 +261,6 @@ extern int SkAddrPromiscuousChange( SK_U32 PortNumber, int NewPromMode); -extern int SkAddrXmacPromiscuousChange( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber, - int NewPromMode); - -extern int SkAddrGmacPromiscuousChange( - SK_AC *pAC, - SK_IOC IoC, - SK_U32 PortNumber, - int NewPromMode); - #ifndef SK_SLIM extern int SkAddrSwap( SK_AC *pAC, diff --git a/drivers/net/sk98lin/h/skcsum.h b/drivers/net/sk98lin/h/skcsum.h index 2b94adb..6e256bd 100644 --- a/drivers/net/sk98lin/h/skcsum.h +++ b/drivers/net/sk98lin/h/skcsum.h @@ -203,12 +203,6 @@ extern SKCS_STATUS SkCsGetReceiveInfo( unsigned Checksum2, int NetNumber); -extern void SkCsGetSendInfo( - SK_AC *pAc, - void *pIpHeader, - SKCS_PACKET_INFO *pPacketInfo, - int NetNumber); - extern void SkCsSetReceiveFlags( SK_AC *pAc, unsigned ReceiveFlags, diff --git a/drivers/net/sk98lin/h/skgeinit.h b/drivers/net/sk98lin/h/skgeinit.h index 184f47c..143e635 100644 --- a/drivers/net/sk98lin/h/skgeinit.h +++ b/drivers/net/sk98lin/h/skgeinit.h @@ -464,12 +464,6 @@ typedef struct s_GeInit { /* * public functions in skgeinit.c */ -extern void SkGePollRxD( - SK_AC *pAC, - SK_IOC IoC, - int Port, - SK_BOOL PollRxD); - extern void SkGePollTxD( SK_AC *pAC, SK_IOC IoC, @@ -522,10 +516,6 @@ extern void SkGeXmitLED( int Led, int Mode); -extern void SkGeInitRamIface( - SK_AC *pAC, - SK_IOC IoC); - extern int SkGeInitAssignRamToQueues( SK_AC *pAC, int ActivePort, @@ -549,11 +539,6 @@ extern void SkMacHardRst( SK_IOC IoC, int Port); -extern void SkMacClearRst( - SK_AC *pAC, - SK_IOC IoC, - int Port); - extern void SkXmInitMac( SK_AC *pAC, SK_IOC IoC, @@ -580,11 +565,6 @@ extern void SkMacFlushTxFifo( SK_IOC IoC, int Port); -extern void SkMacFlushRxFifo( - SK_AC *pAC, - SK_IOC IoC, - int Port); - extern void SkMacIrq( SK_AC *pAC, SK_IOC IoC, @@ -601,12 +581,6 @@ extern void SkMacAutoNegLipaPhy( int Port, SK_U16 IStatus); -extern void SkMacSetRxTxEn( - SK_AC *pAC, - SK_IOC IoC, - int Port, - int Para); - extern int SkMacRxTxEnable( SK_AC *pAC, SK_IOC IoC, @@ -659,16 +633,6 @@ extern void SkXmClrExactAddr( int StartNum, int StopNum); -extern void SkXmInitDupMd( - SK_AC *pAC, - SK_IOC IoC, - int Port); - -extern void SkXmInitPauseMd( - SK_AC *pAC, - SK_IOC IoC, - int Port); - extern void SkXmAutoNegLipaXmac( SK_AC *pAC, SK_IOC IoC, @@ -729,17 +693,6 @@ extern int SkGmCableDiagStatus( int Port, SK_BOOL StartTest); -extern int SkGmEnterLowPowerMode( - SK_AC *pAC, - SK_IOC IoC, - int Port, - SK_U8 Mode); - -extern int SkGmLeaveLowPowerMode( - SK_AC *pAC, - SK_IOC IoC, - int Port); - #ifdef SK_DIAG extern void SkGePhyRead( SK_AC *pAC, @@ -782,7 +735,6 @@ extern void SkXmSendCont( /* * public functions in skgeinit.c */ -extern void SkGePollRxD(); extern void SkGePollTxD(); extern void SkGeYellowLED(); extern int SkGeCfgSync(); @@ -792,7 +744,6 @@ extern int SkGeInit(); extern void SkGeDeInit(); extern int SkGeInitPort(); extern void SkGeXmitLED(); -extern void SkGeInitRamIface(); extern int SkGeInitAssignRamToQueues(); /* @@ -801,18 +752,15 @@ extern int SkGeInitAssignRamToQueues(); extern void SkMacRxTxDisable(); extern void SkMacSoftRst(); extern void SkMacHardRst(); -extern void SkMacClearRst(); extern void SkMacInitPhy(); extern int SkMacRxTxEnable(); extern void SkMacPromiscMode(); extern void SkMacHashing(); extern void SkMacIrqDisable(); extern void SkMacFlushTxFifo(); -extern void SkMacFlushRxFifo(); extern void SkMacIrq(); extern int SkMacAutoNegDone(); extern void SkMacAutoNegLipaPhy(); -extern void SkMacSetRxTxEn(); extern void SkXmInitMac(); extern void SkXmPhyRead(); extern void SkXmPhyWrite(); @@ -820,8 +768,6 @@ extern void SkGmInitMac(); extern void SkGmPhyRead(); extern void SkGmPhyWrite(); extern void SkXmClrExactAddr(); -extern void SkXmInitDupMd(); -extern void SkXmInitPauseMd(); extern void SkXmAutoNegLipaXmac(); extern int SkXmUpdateStats(); extern int SkGmUpdateStats(); @@ -832,8 +778,6 @@ extern int SkGmResetCounter(); extern int SkXmOverflowStatus(); extern int SkGmOverflowStatus(); extern int SkGmCableDiagStatus(); -extern int SkGmEnterLowPowerMode(); -extern int SkGmLeaveLowPowerMode(); #ifdef SK_DIAG extern void SkGePhyRead(); diff --git a/drivers/net/sk98lin/h/skgepnmi.h b/drivers/net/sk98lin/h/skgepnmi.h index 3b2773e..1ed214c 100644 --- a/drivers/net/sk98lin/h/skgepnmi.h +++ b/drivers/net/sk98lin/h/skgepnmi.h @@ -946,10 +946,6 @@ typedef struct s_PnmiData { * Function prototypes */ extern int SkPnmiInit(SK_AC *pAC, SK_IOC IoC, int Level); -extern int SkPnmiGetVar(SK_AC *pAC, SK_IOC IoC, SK_U32 Id, void* pBuf, - unsigned int* pLen, SK_U32 Instance, SK_U32 NetIndex); -extern int SkPnmiPreSetVar(SK_AC *pAC, SK_IOC IoC, SK_U32 Id, - void* pBuf, unsigned int *pLen, SK_U32 Instance, SK_U32 NetIndex); extern int SkPnmiSetVar(SK_AC *pAC, SK_IOC IoC, SK_U32 Id, void* pBuf, unsigned int *pLen, SK_U32 Instance, SK_U32 NetIndex); extern int SkPnmiGetStruct(SK_AC *pAC, SK_IOC IoC, void* pBuf, diff --git a/drivers/net/sk98lin/h/skgesirq.h b/drivers/net/sk98lin/h/skgesirq.h index b486bd9..3eec627 100644 --- a/drivers/net/sk98lin/h/skgesirq.h +++ b/drivers/net/sk98lin/h/skgesirq.h @@ -105,7 +105,6 @@ extern void SkGeSirqIsr(SK_AC *pAC, SK_IOC IoC, SK_U32 Istatus); extern int SkGeSirqEvent(SK_AC *pAC, SK_IOC IoC, SK_U32 Event, SK_EVPARA Para); -extern void SkHWLinkUp(SK_AC *pAC, SK_IOC IoC, int Port); extern void SkHWLinkDown(SK_AC *pAC, SK_IOC IoC, int Port); #endif /* _INC_SKGESIRQ_H_ */ diff --git a/drivers/net/sk98lin/h/ski2c.h b/drivers/net/sk98lin/h/ski2c.h index 598bb42..6a63f4a 100644 --- a/drivers/net/sk98lin/h/ski2c.h +++ b/drivers/net/sk98lin/h/ski2c.h @@ -162,9 +162,6 @@ typedef struct s_I2c { } SK_I2C; extern int SkI2cInit(SK_AC *pAC, SK_IOC IoC, int Level); -extern int SkI2cWrite(SK_AC *pAC, SK_IOC IoC, SK_U32 Data, int Dev, int Size, - int Reg, int Burst); -extern int SkI2cReadSensor(SK_AC *pAC, SK_IOC IoC, SK_SENSOR *pSen); #ifdef SK_DIAG extern SK_U32 SkI2cRead(SK_AC *pAC, SK_IOC IoC, int Dev, int Size, int Reg, int Burst); diff --git a/drivers/net/sk98lin/h/skvpd.h b/drivers/net/sk98lin/h/skvpd.h index daa9a8d..fdd9e48 100644 --- a/drivers/net/sk98lin/h/skvpd.h +++ b/drivers/net/sk98lin/h/skvpd.h @@ -183,14 +183,6 @@ extern SK_U32 VpdReadDWord( int addr); #endif /* SKDIAG */ -extern int VpdSetupPara( - SK_AC *pAC, - const char *key, - const char *buf, - int len, - int type, - int op); - extern SK_VPD_STATUS *VpdStat( SK_AC *pAC, SK_IOC IoC); @@ -227,11 +219,6 @@ extern int VpdUpdate( SK_AC *pAC, SK_IOC IoC); -extern void VpdErrLog( - SK_AC *pAC, - SK_IOC IoC, - char *msg); - #ifdef SKDIAG extern int VpdReadBlock( SK_AC *pAC, @@ -249,7 +236,6 @@ extern int VpdWriteBlock( #endif /* SKDIAG */ #else /* SK_KR_PROTO */ extern SK_U32 VpdReadDWord(); -extern int VpdSetupPara(); extern SK_VPD_STATUS *VpdStat(); extern int VpdKeys(); extern int VpdRead(); @@ -257,7 +243,6 @@ extern SK_BOOL VpdMayWrite(); extern int VpdWrite(); extern int VpdDelete(); extern int VpdUpdate(); -extern void VpdErrLog(); #endif /* SK_KR_PROTO */ #endif /* __INC_SKVPD_H_ */ diff --git a/drivers/net/sk98lin/skaddr.c b/drivers/net/sk98lin/skaddr.c index a7e25ed..6e6c56a 100644 --- a/drivers/net/sk98lin/skaddr.c +++ b/drivers/net/sk98lin/skaddr.c @@ -87,6 +87,21 @@ static const SK_U16 OnesHash[4] = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; static int Next0[SK_MAX_MACS] = {0}; #endif /* DEBUG */ +static int SkAddrGmacMcAdd(SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber, + SK_MAC_ADDR *pMc, int Flags); +static int SkAddrGmacMcClear(SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber, + int Flags); +static int SkAddrGmacMcUpdate(SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber); +static int SkAddrGmacPromiscuousChange(SK_AC *pAC, SK_IOC IoC, + SK_U32 PortNumber, int NewPromMode); +static int SkAddrXmacMcAdd(SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber, + SK_MAC_ADDR *pMc, int Flags); +static int SkAddrXmacMcClear(SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber, + int Flags); +static int SkAddrXmacMcUpdate(SK_AC *pAC, SK_IOC IoC, SK_U32 PortNumber); +static int SkAddrXmacPromiscuousChange(SK_AC *pAC, SK_IOC IoC, + SK_U32 PortNumber, int NewPromMode); + /* functions ******************************************************************/ /****************************************************************************** @@ -372,7 +387,7 @@ int Flags) /* permanent/non-perm, sw-only */ * SK_ADDR_SUCCESS * SK_ADDR_ILLEGAL_PORT */ -int SkAddrXmacMcClear( +static int SkAddrXmacMcClear( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber, /* Index of affected port */ @@ -429,7 +444,7 @@ int Flags) /* permanent/non-perm, sw-only */ * SK_ADDR_SUCCESS * SK_ADDR_ILLEGAL_PORT */ -int SkAddrGmacMcClear( +static int SkAddrGmacMcClear( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber, /* Index of affected port */ @@ -519,7 +534,7 @@ int Flags) /* permanent/non-perm, sw-only */ * Returns: * Hash value of multicast address. */ -SK_U32 SkXmacMcHash( +static SK_U32 SkXmacMcHash( unsigned char *pMc) /* Multicast address */ { SK_U32 Idx; @@ -557,7 +572,7 @@ unsigned char *pMc) /* Multicast address */ * Returns: * Hash value of multicast address. */ -SK_U32 SkGmacMcHash( +static SK_U32 SkGmacMcHash( unsigned char *pMc) /* Multicast address */ { SK_U32 Data; @@ -672,7 +687,7 @@ int Flags) /* permanent/non-permanent */ * SK_MC_ILLEGAL_ADDRESS * SK_MC_RLMT_OVERFLOW */ -int SkAddrXmacMcAdd( +static int SkAddrXmacMcAdd( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber, /* Port Number */ @@ -778,7 +793,7 @@ int Flags) /* permanent/non-permanent */ * SK_MC_FILTERING_INEXACT * SK_MC_ILLEGAL_ADDRESS */ -int SkAddrGmacMcAdd( +static int SkAddrGmacMcAdd( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber, /* Port Number */ @@ -937,7 +952,7 @@ SK_U32 PortNumber) /* Port Number */ * SK_MC_FILTERING_INEXACT * SK_ADDR_ILLEGAL_PORT */ -int SkAddrXmacMcUpdate( +static int SkAddrXmacMcUpdate( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber) /* Port Number */ @@ -1082,7 +1097,7 @@ SK_U32 PortNumber) /* Port Number */ * SK_MC_FILTERING_INEXACT * SK_ADDR_ILLEGAL_PORT */ -int SkAddrGmacMcUpdate( +static int SkAddrGmacMcUpdate( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber) /* Port Number */ @@ -1468,7 +1483,7 @@ int NewPromMode) /* new promiscuous mode */ * SK_ADDR_SUCCESS * SK_ADDR_ILLEGAL_PORT */ -int SkAddrXmacPromiscuousChange( +static int SkAddrXmacPromiscuousChange( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber, /* port whose promiscuous mode changes */ @@ -1585,7 +1600,7 @@ int NewPromMode) /* new promiscuous mode */ * SK_ADDR_SUCCESS * SK_ADDR_ILLEGAL_PORT */ -int SkAddrGmacPromiscuousChange( +static int SkAddrGmacPromiscuousChange( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* I/O context */ SK_U32 PortNumber, /* port whose promiscuous mode changes */ diff --git a/drivers/net/sk98lin/skgeinit.c b/drivers/net/sk98lin/skgeinit.c index 6cb49dd..67f1d6a 100644 --- a/drivers/net/sk98lin/skgeinit.c +++ b/drivers/net/sk98lin/skgeinit.c @@ -59,34 +59,6 @@ static struct s_Config OemConfig = { /****************************************************************************** * - * SkGePollRxD() - Enable / Disable Descriptor Polling of RxD Ring - * - * Description: - * Enable or disable the descriptor polling of the receive descriptor - * ring (RxD) for port 'Port'. - * The new configuration is *not* saved over any SkGeStopPort() and - * SkGeInitPort() calls. - * - * Returns: - * nothing - */ -void SkGePollRxD( -SK_AC *pAC, /* adapter context */ -SK_IOC IoC, /* IO context */ -int Port, /* Port Index (MAC_1 + n) */ -SK_BOOL PollRxD) /* SK_TRUE (enable pol.), SK_FALSE (disable pol.) */ -{ - SK_GEPORT *pPrt; - - pPrt = &pAC->GIni.GP[Port]; - - SK_OUT32(IoC, Q_ADDR(pPrt->PRxQOff, Q_CSR), (PollRxD) ? - CSR_ENA_POL : CSR_DIS_POL); -} /* SkGePollRxD */ - - -/****************************************************************************** - * * SkGePollTxD() - Enable / Disable Descriptor Polling of TxD Rings * * Description: @@ -952,7 +924,7 @@ int Port) /* Port Index (MAC_1 + n) */ * Returns: * nothing */ -void SkGeInitRamIface( +static void SkGeInitRamIface( SK_AC *pAC, /* adapter context */ SK_IOC IoC) /* IO context */ { @@ -1409,83 +1381,6 @@ SK_IOC IoC) /* IO context */ } /* SkGeInit0*/ -#ifdef SK_PCI_RESET - -/****************************************************************************** - * - * SkGePciReset() - Reset PCI interface - * - * Description: - * o Read PCI configuration. - * o Change power state to 3. - * o Change power state to 0. - * o Restore PCI configuration. - * - * Returns: - * 0: Success. - * 1: Power state could not be changed to 3. - */ -static int SkGePciReset( -SK_AC *pAC, /* adapter context */ -SK_IOC IoC) /* IO context */ -{ - int i; - SK_U16 PmCtlSts; - SK_U32 Bp1; - SK_U32 Bp2; - SK_U16 PciCmd; - SK_U8 Cls; - SK_U8 Lat; - SK_U8 ConfigSpace[PCI_CFG_SIZE]; - - /* - * Note: Switching to D3 state is like a software reset. - * Switching from D3 to D0 is a hardware reset. - * We have to save and restore the configuration space. - */ - for (i = 0; i < PCI_CFG_SIZE; i++) { - SkPciReadCfgDWord(pAC, i*4, &ConfigSpace[i]); - } - - /* We know the RAM Interface Arbiter is enabled. */ - SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, PCI_PM_STATE_D3); - SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts); - - if ((PmCtlSts & PCI_PM_STATE_MSK) != PCI_PM_STATE_D3) { - return(1); - } - - /* Return to D0 state. */ - SkPciWriteCfgWord(pAC, PCI_PM_CTL_STS, PCI_PM_STATE_D0); - - /* Check for D0 state. */ - SkPciReadCfgWord(pAC, PCI_PM_CTL_STS, &PmCtlSts); - - if ((PmCtlSts & PCI_PM_STATE_MSK) != PCI_PM_STATE_D0) { - return(1); - } - - /* Check PCI Config Registers. */ - SkPciReadCfgWord(pAC, PCI_COMMAND, &PciCmd); - SkPciReadCfgByte(pAC, PCI_CACHE_LSZ, &Cls); - SkPciReadCfgDWord(pAC, PCI_BASE_1ST, &Bp1); - SkPciReadCfgDWord(pAC, PCI_BASE_2ND, &Bp2); - SkPciReadCfgByte(pAC, PCI_LAT_TIM, &Lat); - - if (PciCmd != 0 || Cls != (SK_U8)0 || Lat != (SK_U8)0 || - (Bp1 & 0xfffffff0L) != 0 || Bp2 != 1) { - return(1); - } - - /* Restore PCI Config Space. */ - for (i = 0; i < PCI_CFG_SIZE; i++) { - SkPciWriteCfgDWord(pAC, i*4, ConfigSpace[i]); - } - - return(0); -} /* SkGePciReset */ - -#endif /* SK_PCI_RESET */ /****************************************************************************** * @@ -1524,10 +1419,6 @@ SK_IOC IoC) /* IO context */ /* save CLK_RUN bits (YUKON-Lite) */ SK_IN16(IoC, B0_CTST, &CtrlStat); -#ifdef SK_PCI_RESET - (void)SkGePciReset(pAC, IoC); -#endif /* SK_PCI_RESET */ - /* do the SW-reset */ SK_OUT8(IoC, B0_CTST, CS_RST_SET); @@ -1991,11 +1882,6 @@ SK_IOC IoC) /* IO context */ int i; SK_U16 Word; -#ifdef SK_PHY_LP_MODE - SK_U8 Byte; - SK_U16 PmCtlSts; -#endif /* SK_PHY_LP_MODE */ - #if (!defined(SK_SLIM) && !defined(VCPU)) /* ensure I2C is ready */ SkI2cWaitIrq(pAC, IoC); @@ -2010,38 +1896,6 @@ SK_IOC IoC) /* IO context */ } } -#ifdef SK_PHY_LP_MODE - /* - * for power saving purposes within mobile environments - * we set the PHY to coma mode and switch to D3 power state. - */ - if (pAC->GIni.GIYukonLite && - pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) { - - /* for all ports switch PHY to coma mode */ - for (i = 0; i < pAC->GIni.GIMacsFound; i++) { - - SkGmEnterLowPowerMode(pAC, IoC, i, PHY_PM_DEEP_SLEEP); - } - - if (pAC->GIni.GIVauxAvail) { - /* switch power to VAUX */ - Byte = PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_ON | PC_VCC_OFF; - - SK_OUT8(IoC, B0_POWER_CTRL, Byte); - } - - /* switch to D3 state */ - SK_IN16(IoC, PCI_C(PCI_PM_CTL_STS), &PmCtlSts); - - PmCtlSts |= PCI_PM_STATE_D3; - - SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); - - SK_OUT16(IoC, PCI_C(PCI_PM_CTL_STS), PmCtlSts); - } -#endif /* SK_PHY_LP_MODE */ - /* Reset all bits in the PCI STATUS register */ /* * Note: PCI Cfg cycles cannot be used, because they are not diff --git a/drivers/net/sk98lin/skgemib.c b/drivers/net/sk98lin/skgemib.c index 2991bc8..0a6f67a 100644 --- a/drivers/net/sk98lin/skgemib.c +++ b/drivers/net/sk98lin/skgemib.c @@ -871,13 +871,6 @@ PNMI_STATIC const SK_PNMI_TAB_ENTRY IdTable[] = { sizeof(SK_PNMI_CONF), SK_PNMI_OFF(Conf) + SK_PNMI_CNF_OFF(ConfPhyType), SK_PNMI_RO, MacPrivateConf, 0}, -#ifdef SK_PHY_LP_MODE - {OID_SKGE_PHY_LP_MODE, - SK_PNMI_MAC_ENTRIES, - sizeof(SK_PNMI_CONF), - SK_PNMI_OFF(Conf) + SK_PNMI_CNF_OFF(ConfPhyMode), - SK_PNMI_RW, MacPrivateConf, 0}, -#endif {OID_SKGE_LINK_CAP, SK_PNMI_MAC_ENTRIES, sizeof(SK_PNMI_CONF), diff --git a/drivers/net/sk98lin/skgepnmi.c b/drivers/net/sk98lin/skgepnmi.c index a386172..b36dd9a 100644 --- a/drivers/net/sk98lin/skgepnmi.c +++ b/drivers/net/sk98lin/skgepnmi.c @@ -56,10 +56,6 @@ static const char SysKonnectFileId[] = * Public Function prototypes */ int SkPnmiInit(SK_AC *pAC, SK_IOC IoC, int level); -int SkPnmiGetVar(SK_AC *pAC, SK_IOC IoC, SK_U32 Id, void *pBuf, - unsigned int *pLen, SK_U32 Instance, SK_U32 NetIndex); -int SkPnmiPreSetVar(SK_AC *pAC, SK_IOC IoC, SK_U32 Id, void *pBuf, - unsigned int *pLen, SK_U32 Instance, SK_U32 NetIndex); int SkPnmiSetVar(SK_AC *pAC, SK_IOC IoC, SK_U32 Id, void *pBuf, unsigned int *pLen, SK_U32 Instance, SK_U32 NetIndex); int SkPnmiGetStruct(SK_AC *pAC, SK_IOC IoC, void *pBuf, @@ -587,7 +583,7 @@ int Level) /* Initialization level */ * exist (e.g. port instance 3 on a two port * adapter. */ -int SkPnmiGetVar( +static int SkPnmiGetVar( SK_AC *pAC, /* Pointer to adapter context */ SK_IOC IoC, /* IO context handle */ SK_U32 Id, /* Object ID that is to be processed */ @@ -629,7 +625,7 @@ SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ * exist (e.g. port instance 3 on a two port * adapter. */ -int SkPnmiPreSetVar( +static int SkPnmiPreSetVar( SK_AC *pAC, /* Pointer to adapter context */ SK_IOC IoC, /* IO context handle */ SK_U32 Id, /* Object ID that is to be processed */ @@ -5062,9 +5058,6 @@ SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ case OID_SKGE_SPEED_CAP: case OID_SKGE_SPEED_MODE: case OID_SKGE_SPEED_STATUS: -#ifdef SK_PHY_LP_MODE - case OID_SKGE_PHY_LP_MODE: -#endif if (*pLen < (Limit - LogPortIndex) * sizeof(SK_U8)) { *pLen = (Limit - LogPortIndex) * sizeof(SK_U8); @@ -5140,28 +5133,6 @@ SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ Offset += sizeof(SK_U32); break; -#ifdef SK_PHY_LP_MODE - case OID_SKGE_PHY_LP_MODE: - if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ - if (LogPortIndex == 0) { - continue; - } - else { - /* Get value for physical ports */ - PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); - Val8 = (SK_U8) pAC->GIni.GP[PhysPortIndex].PPhyPowerState; - *pBufPtr = Val8; - } - } - else { /* DualNetMode */ - - Val8 = (SK_U8) pAC->GIni.GP[PhysPortIndex].PPhyPowerState; - *pBufPtr = Val8; - } - Offset += sizeof(SK_U8); - break; -#endif - case OID_SKGE_LINK_CAP: if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ if (LogPortIndex == 0) { @@ -5478,16 +5449,6 @@ SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ } break; -#ifdef SK_PHY_LP_MODE - case OID_SKGE_PHY_LP_MODE: - if (*pLen < Limit - LogPortIndex) { - - *pLen = Limit - LogPortIndex; - return (SK_PNMI_ERR_TOO_SHORT); - } - break; -#endif - case OID_SKGE_MTU: if (*pLen < sizeof(SK_U32)) { @@ -5845,116 +5806,6 @@ SK_U32 NetIndex) /* NetIndex (0..n), in single net mode always zero */ Offset += sizeof(SK_U32); break; -#ifdef SK_PHY_LP_MODE - case OID_SKGE_PHY_LP_MODE: - /* The preset ends here */ - if (Action == SK_PNMI_PRESET) { - - return (SK_PNMI_ERR_OK); - } - - if (!pAC->Pnmi.DualNetActiveFlag) { /* SingleNetMode */ - if (LogPortIndex == 0) { - Offset = 0; - continue; - } - else { - /* Set value for physical ports */ - PhysPortIndex = SK_PNMI_PORT_LOG2PHYS(pAC, LogPortIndex); - - switch (*(pBuf + Offset)) { - case 0: - /* If LowPowerMode is active, we can leave it. */ - if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { - - Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex); - - if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState < 3) { - - SkDrvInitAdapter(pAC); - } - break; - } - else { - *pLen = 0; - return (SK_PNMI_ERR_GENERAL); - } - case 1: - case 2: - case 3: - case 4: - /* If no LowPowerMode is active, we can enter it. */ - if (!pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { - - if ((*(pBuf + Offset)) < 3) { - - SkDrvDeInitAdapter(pAC); - } - - Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf); - break; - } - else { - *pLen = 0; - return (SK_PNMI_ERR_GENERAL); - } - default: - *pLen = 0; - return (SK_PNMI_ERR_BAD_VALUE); - } - } - } - else { /* DualNetMode */ - - switch (*(pBuf + Offset)) { - case 0: - /* If we are in a LowPowerMode, we can leave it. */ - if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { - - Val32 = SkGmLeaveLowPowerMode(pAC, IoC, PhysPortIndex); - - if (pAC->GIni.GP[PhysPortIndex].PPhyPowerState < 3) { - - SkDrvInitAdapter(pAC); - } - break; - } - else { - *pLen = 0; - return (SK_PNMI_ERR_GENERAL); - } - - case 1: - case 2: - case 3: - case 4: - /* If we are not already in LowPowerMode, we can enter it. */ - if (!pAC->GIni.GP[PhysPortIndex].PPhyPowerState) { - - if ((*(pBuf + Offset)) < 3) { - - SkDrvDeInitAdapter(pAC); - } - else { - - Val32 = SkGmEnterLowPowerMode(pAC, IoC, PhysPortIndex, *pBuf); - } - break; - } - else { - *pLen = 0; - return (SK_PNMI_ERR_GENERAL); - } - - default: - *pLen = 0; - return (SK_PNMI_ERR_BAD_VALUE); - } - } - Offset += sizeof(SK_U8); - break; -#endif - default: SK_DBG_MSG(pAC, SK_DBGMOD_PNMI, SK_DBGCAT_ERR, ("MacPrivateConf: Unknown OID should be handled before set")); diff --git a/drivers/net/sk98lin/skgesirq.c b/drivers/net/sk98lin/skgesirq.c index 87520f0..ab66d80 100644 --- a/drivers/net/sk98lin/skgesirq.c +++ b/drivers/net/sk98lin/skgesirq.c @@ -265,7 +265,7 @@ int Port) /* Port Index (MAC_1 + n) */ * * Returns: N/A */ -void SkHWLinkUp( +static void SkHWLinkUp( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* IO context */ int Port) /* Port Index (MAC_1 + n) */ @@ -612,14 +612,6 @@ SK_U32 Istatus) /* Interrupt status word */ * we ignore those */ pPrt->HalfDupTimerActive = SK_TRUE; -#ifdef XXX - Len = sizeof(SK_U64); - SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets, - &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, 0), - pAC->Rlmt.Port[0].Net->NetNumber); - - pPrt->LastOctets = Octets; -#endif /* XXX */ /* Snap statistic counters */ (void)SkXmUpdateStats(pAC, IoC, 0); @@ -653,14 +645,6 @@ SK_U32 Istatus) /* Interrupt status word */ pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOHALF) && !pPrt->HalfDupTimerActive) { pPrt->HalfDupTimerActive = SK_TRUE; -#ifdef XXX - Len = sizeof(SK_U64); - SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets, - &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, 1), - pAC->Rlmt.Port[1].Net->NetNumber); - - pPrt->LastOctets = Octets; -#endif /* XXX */ /* Snap statistic counters */ (void)SkXmUpdateStats(pAC, IoC, 1); @@ -2085,12 +2069,6 @@ SK_EVPARA Para) /* Event specific Parameter */ pPrt->HalfDupTimerActive = SK_FALSE; if (pPrt->PLinkModeStatus == SK_LMODE_STAT_HALF || pPrt->PLinkModeStatus == SK_LMODE_STAT_AUTOHALF) { -#ifdef XXX - Len = sizeof(SK_U64); - SkPnmiGetVar(pAC, IoC, OID_SKGE_STAT_TX_OCTETS, (char *)&Octets, - &Len, (SK_U32)SK_PNMI_PORT_PHYS2INST(pAC, Port), - pAC->Rlmt.Port[Port].Net->NetNumber); -#endif /* XXX */ /* Snap statistic counters */ (void)SkXmUpdateStats(pAC, IoC, Port); diff --git a/drivers/net/sk98lin/ski2c.c b/drivers/net/sk98lin/ski2c.c index 075a046..79bf57c 100644 --- a/drivers/net/sk98lin/ski2c.c +++ b/drivers/net/sk98lin/ski2c.c @@ -396,7 +396,7 @@ int Rw) /* Read / Write Flag */ * 1: error, transfer does not complete, I2C transfer * killed, wait loop terminated. */ -int SkI2cWait( +static int SkI2cWait( SK_AC *pAC, /* Adapter Context */ SK_IOC IoC, /* I/O Context */ int Event) /* complete event to wait for (I2C_READ or I2C_WRITE) */ @@ -481,7 +481,7 @@ SK_IOC IoC) /* I/O Context */ * returns 0: success * 1: error */ -int SkI2cWrite( +static int SkI2cWrite( SK_AC *pAC, /* Adapter Context */ SK_IOC IoC, /* I/O Context */ SK_U32 I2cData, /* I2C Data to write */ @@ -538,7 +538,7 @@ int I2cBurst) /* I2C Burst Flag */ * 1 if the read is completed * 0 if the read must be continued (I2C Bus still allocated) */ -int SkI2cReadSensor( +static int SkI2cReadSensor( SK_AC *pAC, /* Adapter Context */ SK_IOC IoC, /* I/O Context */ SK_SENSOR *pSen) /* Sensor to be read */ diff --git a/drivers/net/sk98lin/sklm80.c b/drivers/net/sk98lin/sklm80.c index 68292d1..a204f5b 100644 --- a/drivers/net/sk98lin/sklm80.c +++ b/drivers/net/sk98lin/sklm80.c @@ -34,79 +34,7 @@ static const char SysKonnectFileId[] = #include "h/lm80.h" #include "h/skdrv2nd.h" /* Adapter Control- and Driver specific Def. */ -#ifdef SK_DIAG -#define BREAK_OR_WAIT(pAC,IoC,Event) SkI2cWait(pAC,IoC,Event) -#else /* nSK_DIAG */ #define BREAK_OR_WAIT(pAC,IoC,Event) break -#endif /* nSK_DIAG */ - -#ifdef SK_DIAG -/* - * read the register 'Reg' from the device 'Dev' - * - * return read error -1 - * success the read value - */ -int SkLm80RcvReg( -SK_IOC IoC, /* Adapter Context */ -int Dev, /* I2C device address */ -int Reg) /* register to read */ -{ - int Val = 0; - int TempExt; - - /* Signal device number */ - if (SkI2cSndDev(IoC, Dev, I2C_WRITE)) { - return(-1); - } - - if (SkI2cSndByte(IoC, Reg)) { - return(-1); - } - - /* repeat start */ - if (SkI2cSndDev(IoC, Dev, I2C_READ)) { - return(-1); - } - - switch (Reg) { - case LM80_TEMP_IN: - Val = (int)SkI2cRcvByte(IoC, 1); - - /* First: correct the value: it might be negative */ - if ((Val & 0x80) != 0) { - /* Value is negative */ - Val = Val - 256; - } - Val = Val * SK_LM80_TEMP_LSB; - SkI2cStop(IoC); - - TempExt = (int)SkLm80RcvReg(IoC, LM80_ADDR, LM80_TEMP_CTRL); - - if (Val > 0) { - Val += ((TempExt >> 7) * SK_LM80_TEMPEXT_LSB); - } - else { - Val -= ((TempExt >> 7) * SK_LM80_TEMPEXT_LSB); - } - return(Val); - break; - case LM80_VT0_IN: - case LM80_VT1_IN: - case LM80_VT2_IN: - case LM80_VT3_IN: - Val = (int)SkI2cRcvByte(IoC, 1) * SK_LM80_VT_LSB; - break; - - default: - Val = (int)SkI2cRcvByte(IoC, 1); - break; - } - - SkI2cStop(IoC); - return(Val); -} -#endif /* SK_DIAG */ /* * read a sensors value (LM80 specific) diff --git a/drivers/net/sk98lin/skrlmt.c b/drivers/net/sk98lin/skrlmt.c index 9ea11ab..be8d1cc 100644 --- a/drivers/net/sk98lin/skrlmt.c +++ b/drivers/net/sk98lin/skrlmt.c @@ -282,7 +282,6 @@ typedef struct s_SpTreeRlmtPacket { SK_MAC_ADDR SkRlmtMcAddr = {{0x01, 0x00, 0x5A, 0x52, 0x4C, 0x4D}}; SK_MAC_ADDR BridgeMcAddr = {{0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}}; -SK_MAC_ADDR BcAddr = {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; /* local variables ************************************************************/ diff --git a/drivers/net/sk98lin/skvpd.c b/drivers/net/sk98lin/skvpd.c index eb3c898..1778605 100644 --- a/drivers/net/sk98lin/skvpd.c +++ b/drivers/net/sk98lin/skvpd.c @@ -132,65 +132,6 @@ int addr) /* VPD address */ #endif /* SKDIAG */ -#if 0 - -/* - Write the dword 'data' at address 'addr' into the VPD EEPROM, and - verify that the data is written. - - Needed Time: - -. MIN MAX -. ------------------------------------------------------------------- -. write 1.8 ms 3.6 ms -. internal write cyles 0.7 ms 7.0 ms -. ------------------------------------------------------------------- -. over all program time 2.5 ms 10.6 ms -. read 1.3 ms 2.6 ms -. ------------------------------------------------------------------- -. over all 3.8 ms 13.2 ms -. - - - Returns 0: success - 1: error, I2C transfer does not terminate - 2: error, data verify error - - */ -static int VpdWriteDWord( -SK_AC *pAC, /* pAC pointer */ -SK_IOC IoC, /* IO Context */ -int addr, /* VPD address */ -SK_U32 data) /* VPD data to write */ -{ - /* start VPD write */ - /* Don't swap here, it's a data stream of bytes */ - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_CTRL, - ("VPD write dword at addr 0x%x, data = 0x%x\n",addr,data)); - VPD_OUT32(pAC, IoC, PCI_VPD_DAT_REG, (SK_U32)data); - /* But do it here */ - addr |= VPD_WRITE; - - VPD_OUT16(pAC, IoC, PCI_VPD_ADR_REG, (SK_U16)(addr | VPD_WRITE)); - - /* this may take up to 10,6 ms */ - if (VpdWait(pAC, IoC, VPD_WRITE)) { - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, - ("Write Timed Out\n")); - return(1); - }; - - /* verify data */ - if (VpdReadDWord(pAC, IoC, addr) != data) { - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, - ("Data Verify Error\n")); - return(2); - } - return(0); -} /* VpdWriteDWord */ - -#endif /* 0 */ - /* * Read one Stream of 'len' bytes of VPD data, starting at 'addr' from * or to the I2C EEPROM. @@ -728,7 +669,7 @@ char *etp) /* end pointer input position */ * 6: fatal VPD error * */ -int VpdSetupPara( +static int VpdSetupPara( SK_AC *pAC, /* common data base */ const char *key, /* keyword to insert */ const char *buf, /* buffer with the keyword value */ @@ -1148,50 +1089,3 @@ SK_IOC IoC) /* IO Context */ return(0); } - - -/* - * Read the contents of the VPD EEPROM and copy it to the VPD buffer - * if not already done. If the keyword "VF" is not present it will be - * created and the error log message will be stored to this keyword. - * If "VF" is not present the error log message will be stored to the - * keyword "VL". "VL" will created or overwritten if "VF" is present. - * The VPD read/write area is saved to the VPD EEPROM. - * - * returns nothing, errors will be ignored. - */ -void VpdErrLog( -SK_AC *pAC, /* common data base */ -SK_IOC IoC, /* IO Context */ -char *msg) /* error log message */ -{ - SK_VPD_PARA *v, vf; /* VF */ - int len; - - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_TX, - ("VPD error log msg %s\n", msg)); - if ((pAC->vpd.v.vpd_status & VPD_VALID) == 0) { - if (VpdInit(pAC, IoC) != 0) { - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR, - ("VPD init error\n")); - return; - } - } - - len = strlen(msg); - if (len > VPD_MAX_LEN) { - /* cut it */ - len = VPD_MAX_LEN; - } - if ((v = vpd_find_para(pAC, VPD_VF, &vf)) != NULL) { - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_TX, ("overwrite VL\n")); - (void)VpdSetupPara(pAC, VPD_VL, msg, len, VPD_RW_KEY, OWR_KEY); - } - else { - SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_TX, ("write VF\n")); - (void)VpdSetupPara(pAC, VPD_VF, msg, len, VPD_RW_KEY, ADD_KEY); - } - - (void)VpdUpdate(pAC, IoC); -} - diff --git a/drivers/net/sk98lin/skxmac2.c b/drivers/net/sk98lin/skxmac2.c index 42d2d96..b4e7502 100644 --- a/drivers/net/sk98lin/skxmac2.c +++ b/drivers/net/sk98lin/skxmac2.c @@ -41,13 +41,13 @@ static const char SysKonnectFileId[] = #endif #ifdef GENESIS -BCOM_HACK BcomRegA1Hack[] = { +static BCOM_HACK BcomRegA1Hack[] = { { 0x18, 0x0c20 }, { 0x17, 0x0012 }, { 0x15, 0x1104 }, { 0x17, 0x0013 }, { 0x15, 0x0404 }, { 0x17, 0x8006 }, { 0x15, 0x0132 }, { 0x17, 0x8006 }, { 0x15, 0x0232 }, { 0x17, 0x800D }, { 0x15, 0x000F }, { 0x18, 0x0420 }, { 0, 0 } }; -BCOM_HACK BcomRegC0Hack[] = { +static BCOM_HACK BcomRegC0Hack[] = { { 0x18, 0x0c20 }, { 0x17, 0x0012 }, { 0x15, 0x1204 }, { 0x17, 0x0013 }, { 0x15, 0x0A04 }, { 0x18, 0x0420 }, { 0, 0 } @@ -790,7 +790,7 @@ int Port) /* Port Index (MAC_1 + n) */ * Returns: * nothing */ -void SkMacFlushRxFifo( +static void SkMacFlushRxFifo( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* IO context */ int Port) /* Port Index (MAC_1 + n) */ @@ -1231,38 +1231,6 @@ int Port) /* Port Index (MAC_1 + n) */ } /* SkMacHardRst */ -/****************************************************************************** - * - * SkMacClearRst() - Clear the MAC reset - * - * Description: calls a clear MAC reset routine dep. on board type - * - * Returns: - * nothing - */ -void SkMacClearRst( -SK_AC *pAC, /* adapter context */ -SK_IOC IoC, /* IO context */ -int Port) /* Port Index (MAC_1 + n) */ -{ - -#ifdef GENESIS - if (pAC->GIni.GIGenesis) { - - SkXmClearRst(pAC, IoC, Port); - } -#endif /* GENESIS */ - -#ifdef YUKON - if (pAC->GIni.GIYukon) { - - SkGmClearRst(pAC, IoC, Port); - } -#endif /* YUKON */ - -} /* SkMacClearRst */ - - #ifdef GENESIS /****************************************************************************** * @@ -1713,7 +1681,7 @@ int Port) /* Port Index (MAC_1 + n) */ * Returns: * nothing */ -void SkXmInitDupMd( +static void SkXmInitDupMd( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* IO context */ int Port) /* Port Index (MAC_1 + n) */ @@ -1761,7 +1729,7 @@ int Port) /* Port Index (MAC_1 + n) */ * Returns: * nothing */ -void SkXmInitPauseMd( +static void SkXmInitPauseMd( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* IO context */ int Port) /* Port Index (MAC_1 + n) */ @@ -2076,283 +2044,7 @@ SK_BOOL DoLoop) /* Should a Phy LoopBack be set-up? */ } /* SkXmInitPhyBcom */ #endif /* GENESIS */ - #ifdef YUKON -#ifndef SK_SLIM -/****************************************************************************** - * - * SkGmEnterLowPowerMode() - * - * Description: - * This function sets the Marvell Alaska PHY to the low power mode - * given by parameter mode. - * The following low power modes are available: - * - * - Coma Mode (Deep Sleep): - * Power consumption: ~15 - 30 mW - * The PHY cannot wake up on its own. - * - * - IEEE 22.2.4.1.5 compatible power down mode - * Power consumption: ~240 mW - * The PHY cannot wake up on its own. - * - * - energy detect mode - * Power consumption: ~160 mW - * The PHY can wake up on its own by detecting activity - * on the CAT 5 cable. - * - * - energy detect plus mode - * Power consumption: ~150 mW - * The PHY can wake up on its own by detecting activity - * on the CAT 5 cable. - * Connected devices can be woken up by sending normal link - * pulses every one second. - * - * Note: - * - * Returns: - * 0: ok - * 1: error - */ -int SkGmEnterLowPowerMode( -SK_AC *pAC, /* adapter context */ -SK_IOC IoC, /* IO context */ -int Port, /* Port Index (e.g. MAC_1) */ -SK_U8 Mode) /* low power mode */ -{ - SK_U16 Word; - SK_U32 DWord; - SK_U8 LastMode; - int Ret = 0; - - if (pAC->GIni.GIYukonLite && - pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) { - - /* save current power mode */ - LastMode = pAC->GIni.GP[Port].PPhyPowerState; - pAC->GIni.GP[Port].PPhyPowerState = Mode; - - switch (Mode) { - /* coma mode (deep sleep) */ - case PHY_PM_DEEP_SLEEP: - /* setup General Purpose Control Register */ - GM_OUT16(IoC, 0, GM_GP_CTRL, GM_GPCR_FL_PASS | - GM_GPCR_SPEED_100 | GM_GPCR_AU_ALL_DIS); - - /* apply COMA mode workaround */ - SkGmPhyWrite(pAC, IoC, Port, 29, 0x001f); - SkGmPhyWrite(pAC, IoC, Port, 30, 0xfff3); - - SK_IN32(IoC, PCI_C(PCI_OUR_REG_1), &DWord); - - SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); - - /* Set PHY to Coma Mode */ - SK_OUT32(IoC, PCI_C(PCI_OUR_REG_1), DWord | PCI_PHY_COMA); - - SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); - - break; - - /* IEEE 22.2.4.1.5 compatible power down mode */ - case PHY_PM_IEEE_POWER_DOWN: - /* - * - disable MAC 125 MHz clock - * - allow MAC power down - */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); - Word |= PHY_M_PC_DIS_125CLK; - Word &= ~PHY_M_PC_MAC_POW_UP; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); - - /* - * register changes must be followed by a software - * reset to take effect - */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); - Word |= PHY_CT_RESET; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); - - /* switch IEEE compatible power down mode on */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); - Word |= PHY_CT_PDOWN; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); - break; - - /* energy detect and energy detect plus mode */ - case PHY_PM_ENERGY_DETECT: - case PHY_PM_ENERGY_DETECT_PLUS: - /* - * - disable MAC 125 MHz clock - */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); - Word |= PHY_M_PC_DIS_125CLK; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); - - /* activate energy detect mode 1 */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); - - /* energy detect mode */ - if (Mode == PHY_PM_ENERGY_DETECT) { - Word |= PHY_M_PC_EN_DET; - } - /* energy detect plus mode */ - else { - Word |= PHY_M_PC_EN_DET_PLUS; - } - - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); - - /* - * reinitialize the PHY to force a software reset - * which is necessary after the register settings - * for the energy detect modes. - * Furthermore reinitialisation prevents that the - * PHY is running out of a stable state. - */ - SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE); - break; - - /* don't change current power mode */ - default: - pAC->GIni.GP[Port].PPhyPowerState = LastMode; - Ret = 1; - break; - } - } - /* low power modes are not supported by this chip */ - else { - Ret = 1; - } - - return(Ret); - -} /* SkGmEnterLowPowerMode */ - -/****************************************************************************** - * - * SkGmLeaveLowPowerMode() - * - * Description: - * Leave the current low power mode and switch to normal mode - * - * Note: - * - * Returns: - * 0: ok - * 1: error - */ -int SkGmLeaveLowPowerMode( -SK_AC *pAC, /* adapter context */ -SK_IOC IoC, /* IO context */ -int Port) /* Port Index (e.g. MAC_1) */ -{ - SK_U32 DWord; - SK_U16 Word; - SK_U8 LastMode; - int Ret = 0; - - if (pAC->GIni.GIYukonLite && - pAC->GIni.GIChipRev >= CHIP_REV_YU_LITE_A3) { - - /* save current power mode */ - LastMode = pAC->GIni.GP[Port].PPhyPowerState; - pAC->GIni.GP[Port].PPhyPowerState = PHY_PM_OPERATIONAL_MODE; - - switch (LastMode) { - /* coma mode (deep sleep) */ - case PHY_PM_DEEP_SLEEP: - SK_IN32(IoC, PCI_C(PCI_OUR_REG_1), &DWord); - - SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_ON); - - /* Release PHY from Coma Mode */ - SK_OUT32(IoC, PCI_C(PCI_OUR_REG_1), DWord & ~PCI_PHY_COMA); - - SK_OUT8(IoC, B2_TST_CTRL1, TST_CFG_WRITE_OFF); - - SK_IN32(IoC, B2_GP_IO, &DWord); - - /* set to output */ - DWord |= (GP_DIR_9 | GP_IO_9); - - /* set PHY reset */ - SK_OUT32(IoC, B2_GP_IO, DWord); - - DWord &= ~GP_IO_9; /* clear PHY reset (active high) */ - - /* clear PHY reset */ - SK_OUT32(IoC, B2_GP_IO, DWord); - break; - - /* IEEE 22.2.4.1.5 compatible power down mode */ - case PHY_PM_IEEE_POWER_DOWN: - /* - * - enable MAC 125 MHz clock - * - set MAC power up - */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); - Word &= ~PHY_M_PC_DIS_125CLK; - Word |= PHY_M_PC_MAC_POW_UP; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); - - /* - * register changes must be followed by a software - * reset to take effect - */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); - Word |= PHY_CT_RESET; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); - - /* switch IEEE compatible power down mode off */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_CTRL, &Word); - Word &= ~PHY_CT_PDOWN; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_CTRL, Word); - break; - - /* energy detect and energy detect plus mode */ - case PHY_PM_ENERGY_DETECT: - case PHY_PM_ENERGY_DETECT_PLUS: - /* - * - enable MAC 125 MHz clock - */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); - Word &= ~PHY_M_PC_DIS_125CLK; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); - - /* disable energy detect mode */ - SkGmPhyRead(pAC, IoC, Port, PHY_MARV_PHY_CTRL, &Word); - Word &= ~PHY_M_PC_EN_DET_MSK; - SkGmPhyWrite(pAC, IoC, Port, PHY_MARV_PHY_CTRL, Word); - - /* - * reinitialize the PHY to force a software reset - * which is necessary after the register settings - * for the energy detect modes. - * Furthermore reinitialisation prevents that the - * PHY is running out of a stable state. - */ - SkGmInitPhyMarv(pAC, IoC, Port, SK_FALSE); - break; - - /* don't change current power mode */ - default: - pAC->GIni.GP[Port].PPhyPowerState = LastMode; - Ret = 1; - break; - } - } - /* low power modes are not supported by this chip */ - else { - Ret = 1; - } - - return(Ret); - -} /* SkGmLeaveLowPowerMode */ -#endif /* !SK_SLIM */ - - /****************************************************************************** * * SkGmInitPhyMarv() - Initialize the Marvell Phy registers @@ -3420,145 +3112,6 @@ int Port) /* Port Index (MAC_1 + n) */ } /* SkMacAutoNegDone */ -#ifdef GENESIS -/****************************************************************************** - * - * SkXmSetRxTxEn() - Special Set Rx/Tx Enable and some features in XMAC - * - * Description: - * sets MAC or PHY LoopBack and Duplex Mode in the MMU Command Reg. - * enables Rx/Tx - * - * Returns: N/A - */ -static void SkXmSetRxTxEn( -SK_AC *pAC, /* Adapter Context */ -SK_IOC IoC, /* IO context */ -int Port, /* Port Index (MAC_1 + n) */ -int Para) /* Parameter to set: MAC or PHY LoopBack, Duplex Mode */ -{ - SK_U16 Word; - - XM_IN16(IoC, Port, XM_MMU_CMD, &Word); - - switch (Para & (SK_MAC_LOOPB_ON | SK_MAC_LOOPB_OFF)) { - case SK_MAC_LOOPB_ON: - Word |= XM_MMU_MAC_LB; - break; - case SK_MAC_LOOPB_OFF: - Word &= ~XM_MMU_MAC_LB; - break; - } - - switch (Para & (SK_PHY_LOOPB_ON | SK_PHY_LOOPB_OFF)) { - case SK_PHY_LOOPB_ON: - Word |= XM_MMU_GMII_LOOP; - break; - case SK_PHY_LOOPB_OFF: - Word &= ~XM_MMU_GMII_LOOP; - break; - } - - switch (Para & (SK_PHY_FULLD_ON | SK_PHY_FULLD_OFF)) { - case SK_PHY_FULLD_ON: - Word |= XM_MMU_GMII_FD; - break; - case SK_PHY_FULLD_OFF: - Word &= ~XM_MMU_GMII_FD; - break; - } - - XM_OUT16(IoC, Port, XM_MMU_CMD, Word | XM_MMU_ENA_RX | XM_MMU_ENA_TX); - - /* dummy read to ensure writing */ - XM_IN16(IoC, Port, XM_MMU_CMD, &Word); - -} /* SkXmSetRxTxEn */ -#endif /* GENESIS */ - - -#ifdef YUKON -/****************************************************************************** - * - * SkGmSetRxTxEn() - Special Set Rx/Tx Enable and some features in GMAC - * - * Description: - * sets MAC LoopBack and Duplex Mode in the General Purpose Control Reg. - * enables Rx/Tx - * - * Returns: N/A - */ -static void SkGmSetRxTxEn( -SK_AC *pAC, /* Adapter Context */ -SK_IOC IoC, /* IO context */ -int Port, /* Port Index (MAC_1 + n) */ -int Para) /* Parameter to set: MAC LoopBack, Duplex Mode */ -{ - SK_U16 Ctrl; - - GM_IN16(IoC, Port, GM_GP_CTRL, &Ctrl); - - switch (Para & (SK_MAC_LOOPB_ON | SK_MAC_LOOPB_OFF)) { - case SK_MAC_LOOPB_ON: - Ctrl |= GM_GPCR_LOOP_ENA; - break; - case SK_MAC_LOOPB_OFF: - Ctrl &= ~GM_GPCR_LOOP_ENA; - break; - } - - switch (Para & (SK_PHY_FULLD_ON | SK_PHY_FULLD_OFF)) { - case SK_PHY_FULLD_ON: - Ctrl |= GM_GPCR_DUP_FULL; - break; - case SK_PHY_FULLD_OFF: - Ctrl &= ~GM_GPCR_DUP_FULL; - break; - } - - GM_OUT16(IoC, Port, GM_GP_CTRL, (SK_U16)(Ctrl | GM_GPCR_RX_ENA | - GM_GPCR_TX_ENA)); - - /* dummy read to ensure writing */ - GM_IN16(IoC, Port, GM_GP_CTRL, &Ctrl); - -} /* SkGmSetRxTxEn */ -#endif /* YUKON */ - - -#ifndef SK_SLIM -/****************************************************************************** - * - * SkMacSetRxTxEn() - Special Set Rx/Tx Enable and parameters - * - * Description: calls the Special Set Rx/Tx Enable routines dep. on board type - * - * Returns: N/A - */ -void SkMacSetRxTxEn( -SK_AC *pAC, /* Adapter Context */ -SK_IOC IoC, /* IO context */ -int Port, /* Port Index (MAC_1 + n) */ -int Para) -{ -#ifdef GENESIS - if (pAC->GIni.GIGenesis) { - - SkXmSetRxTxEn(pAC, IoC, Port, Para); - } -#endif /* GENESIS */ - -#ifdef YUKON - if (pAC->GIni.GIYukon) { - - SkGmSetRxTxEn(pAC, IoC, Port, Para); - } -#endif /* YUKON */ - -} /* SkMacSetRxTxEn */ -#endif /* !SK_SLIM */ - - /****************************************************************************** * * SkMacRxTxEnable() - Enable Rx/Tx activity if port is up @@ -3976,7 +3529,7 @@ SK_U16 PhyStat) /* PHY Status word to analyse */ * Returns: * nothing */ -void SkXmIrq( +static void SkXmIrq( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* IO context */ int Port) /* Port Index (MAC_1 + n) */ @@ -4112,7 +3665,7 @@ int Port) /* Port Index (MAC_1 + n) */ * Returns: * nothing */ -void SkGmIrq( +static void SkGmIrq( SK_AC *pAC, /* adapter context */ SK_IOC IoC, /* IO context */ int Port) /* Port Index (MAC_1 + n) */ diff --git a/drivers/net/skfp/fplustm.c b/drivers/net/skfp/fplustm.c index a2ed47f..a4b2b69 100644 --- a/drivers/net/skfp/fplustm.c +++ b/drivers/net/skfp/fplustm.c @@ -89,21 +89,21 @@ static const u_short my_sagp = 0xffff ; /* short group address (n.u.) */ /* * useful interrupt bits */ -static int mac_imsk1u = FM_STXABRS | FM_STXABRA0 | FM_SXMTABT ; -static int mac_imsk1l = FM_SQLCKS | FM_SQLCKA0 | FM_SPCEPDS | FM_SPCEPDA0| +static const int mac_imsk1u = FM_STXABRS | FM_STXABRA0 | FM_SXMTABT ; +static const int mac_imsk1l = FM_SQLCKS | FM_SQLCKA0 | FM_SPCEPDS | FM_SPCEPDA0| FM_STBURS | FM_STBURA0 ; /* delete FM_SRBFL after tests */ -static int mac_imsk2u = FM_SERRSF | FM_SNFSLD | FM_SRCVOVR | FM_SRBFL | +static const int mac_imsk2u = FM_SERRSF | FM_SNFSLD | FM_SRCVOVR | FM_SRBFL | FM_SMYCLM ; -static int mac_imsk2l = FM_STRTEXR | FM_SDUPCLM | FM_SFRMCTR | +static const int mac_imsk2l = FM_STRTEXR | FM_SDUPCLM | FM_SFRMCTR | FM_SERRCTR | FM_SLSTCTR | FM_STRTEXP | FM_SMULTDA | FM_SRNGOP ; -static int mac_imsk3u = FM_SRCVOVR2 | FM_SRBFL2 ; -static int mac_imsk3l = FM_SRPERRQ2 | FM_SRPERRQ1 ; +static const int mac_imsk3u = FM_SRCVOVR2 | FM_SRBFL2 ; +static const int mac_imsk3l = FM_SRPERRQ2 | FM_SRPERRQ1 ; -static int mac_beacon_imsk2u = FM_SOTRBEC | FM_SMYBEC | FM_SBEC | +static const int mac_beacon_imsk2u = FM_SOTRBEC | FM_SMYBEC | FM_SBEC | FM_SLOCLM | FM_SHICLM | FM_SMYCLM | FM_SCLM ; diff --git a/drivers/net/skfp/pcmplc.c b/drivers/net/skfp/pcmplc.c index cd0aa4c..74e129f 100644 --- a/drivers/net/skfp/pcmplc.c +++ b/drivers/net/skfp/pcmplc.c @@ -186,7 +186,7 @@ static const struct plt { * Do we need the EBUF error during signaling, too, to detect SUPERNET_3 * PLL bug? */ -static int plc_imsk_na = PL_PCM_CODE | PL_TRACE_PROP | PL_PCM_BREAK | +static const int plc_imsk_na = PL_PCM_CODE | PL_TRACE_PROP | PL_PCM_BREAK | PL_PCM_ENABLED | PL_SELF_TEST | PL_EBUF_ERR; #else /* SUPERNET_3 */ /* @@ -195,7 +195,7 @@ static int plc_imsk_na = PL_PCM_CODE | PL_TRACE_PROP | PL_PCM_BREAK | static int plc_imsk_na = PL_PCM_CODE | PL_TRACE_PROP | PL_PCM_BREAK | PL_PCM_ENABLED | PL_SELF_TEST ; #endif /* SUPERNET_3 */ -static int plc_imsk_act = PL_PCM_CODE | PL_TRACE_PROP | PL_PCM_BREAK | +static const int plc_imsk_act = PL_PCM_CODE | PL_TRACE_PROP | PL_PCM_BREAK | PL_PCM_ENABLED | PL_SELF_TEST | PL_EBUF_ERR; /* external functions */ diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 4b5ed2c..c7fb613 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -67,7 +67,7 @@ /* each new release!!! */ #define VERSION "2.07" -static const char *boot_msg = +static const char * const boot_msg = "SysKonnect FDDI PCI Adapter driver v" VERSION " for\n" " SK-55xx/SK-58xx adapters (SK-NET FDDI-FP/UP/LP)"; diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index d167ded..35b1805 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -201,7 +201,7 @@ static int max_interrupt_work = 20; static int mtu; /* Maximum number of multicast addresses to filter (vs. rx-all-multicast). The Starfire has a 512 element hash table based on the Ethernet CRC. */ -static int multicast_filter_limit = 512; +static const int multicast_filter_limit = 512; /* Whether to do TCP/UDP checksums in hardware */ static int enable_hw_cksum = 1; @@ -463,7 +463,7 @@ static struct pci_device_id starfire_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, starfire_pci_tbl); /* A chip capabilities table, matching the CH_xxx entries in xxx_pci_tbl[] above. */ -static struct chip_info { +static const struct chip_info { const char *name; int drv_flags; } netdrv_tbl[] __devinitdata = { @@ -2084,6 +2084,38 @@ static int netdev_close(struct net_device *dev) return 0; } +#ifdef CONFIG_PM +static int starfire_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (netif_running(dev)) { + netif_device_detach(dev); + netdev_close(dev); + } + + pci_save_state(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev,state)); + + return 0; +} + +static int starfire_resume(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + + if (netif_running(dev)) { + netdev_open(dev); + netif_device_attach(dev); + } + + return 0; +} +#endif /* CONFIG_PM */ + static void __devexit starfire_remove_one (struct pci_dev *pdev) { @@ -2115,6 +2147,10 @@ static struct pci_driver starfire_driver = { .name = DRV_NAME, .probe = starfire_init_one, .remove = __devexit_p(starfire_remove_one), +#ifdef CONFIG_PM + .suspend = starfire_suspend, + .resume = starfire_resume, +#endif /* CONFIG_PM */ .id_table = starfire_pci_tbl, }; diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c index 0ab9c38..61eec46 100644 --- a/drivers/net/sundance.c +++ b/drivers/net/sundance.c @@ -106,7 +106,7 @@ static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ /* Maximum number of multicast addresses to filter (vs. rx-all-multicast). Typical is a 64 element hash table based on the Ethernet CRC. */ -static int multicast_filter_limit = 32; +static const int multicast_filter_limit = 32; /* Set the copy breakpoint for the copy-only-tiny-frames scheme. Setting to > 1518 effectively disables this feature. @@ -298,7 +298,7 @@ enum { struct pci_id_info { const char *name; }; -static struct pci_id_info pci_id_tbl[] = { +static const struct pci_id_info pci_id_tbl[] = { {"D-Link DFE-550TX FAST Ethernet Adapter"}, {"D-Link DFE-550FX 100Mbps Fiber-optics Adapter"}, {"D-Link DFE-580TX 4 port Server Adapter"}, @@ -633,9 +633,13 @@ static int __devinit sundance_probe1 (struct pci_dev *pdev, np->phys[0] = 1; /* Default setting */ np->mii_preamble_required++; + /* + * It seems some phys doesn't deal well with address 0 being accessed + * first, so leave address zero to the end of the loop (32 & 31). + */ for (phy = 1; phy <= 32 && phy_idx < MII_CNT; phy++) { - int mii_status = mdio_read(dev, phy, MII_BMSR); int phyx = phy & 0x1f; + int mii_status = mdio_read(dev, phyx, MII_BMSR); if (mii_status != 0xffff && mii_status != 0x0000) { np->phys[phy_idx++] = phyx; np->mii_if.advertising = mdio_read(dev, phyx, MII_ADVERTISE); diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index d3ddb41..cb0aba9 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -39,7 +39,7 @@ #include "sungem_phy.h" /* Link modes of the BCM5400 PHY */ -static int phy_BCM5400_link_table[8][3] = { +static const int phy_BCM5400_link_table[8][3] = { { 0, 0, 0 }, /* No link */ { 0, 0, 0 }, /* 10BT Half Duplex */ { 1, 0, 0 }, /* 10BT Full Duplex */ diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index caf4102..6c6c549 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -7802,7 +7802,7 @@ static int tg3_test_link(struct tg3 *tp) } /* Only test the commonly used registers */ -static int tg3_test_registers(struct tg3 *tp) +static const int tg3_test_registers(struct tg3 *tp) { int i, is_5705; u32 offset, read_mask, write_mask, val, save_val, read_val; @@ -8016,7 +8016,7 @@ out: static int tg3_do_mem_test(struct tg3 *tp, u32 offset, u32 len) { - static u32 test_pattern[] = { 0x00000000, 0xffffffff, 0xaa55a55a }; + static const u32 test_pattern[] = { 0x00000000, 0xffffffff, 0xaa55a55a }; int i; u32 j; @@ -9097,6 +9097,10 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tp->phy_id = PHY_ID_INVALID; tp->led_ctrl = LED_CTRL_MODE_PHY_1; + /* Do not even try poking around in here on Sun parts. */ + if (tp->tg3_flags2 & TG3_FLG2_SUN_570X) + return; + tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val); if (val == NIC_SRAM_DATA_SIG_MAGIC) { u32 nic_cfg, led_cfg; diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c index 97712c3..c58a4c3 100644 --- a/drivers/net/tokenring/lanstreamer.c +++ b/drivers/net/tokenring/lanstreamer.c @@ -122,6 +122,7 @@ #include <linux/spinlock.h> #include <linux/version.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <net/checksum.h> @@ -512,7 +513,7 @@ static int streamer_reset(struct net_device *dev) while (!((readw(streamer_mmio + SISR)) & SISR_SRB_REPLY)) { msleep_interruptible(100); - if (jiffies - t > 40 * HZ) { + if (time_after(jiffies, t + 40 * HZ)) { printk(KERN_ERR "IBM PCI tokenring card not responding\n"); release_region(dev->base_addr, STREAMER_IO_SPACE); diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c index 05477d2..23032a7 100644 --- a/drivers/net/tokenring/olympic.c +++ b/drivers/net/tokenring/olympic.c @@ -100,6 +100,7 @@ #include <linux/pci.h> #include <linux/spinlock.h> #include <linux/bitops.h> +#include <linux/jiffies.h> #include <net/checksum.h> @@ -307,7 +308,7 @@ static int __devinit olympic_init(struct net_device *dev) t=jiffies; while((readl(olympic_mmio+BCTL)) & BCTL_SOFTRESET) { schedule(); - if(jiffies-t > 40*HZ) { + if(time_after(jiffies, t + 40*HZ)) { printk(KERN_ERR "IBM PCI tokenring card not responding.\n"); return -ENODEV; } @@ -359,7 +360,7 @@ static int __devinit olympic_init(struct net_device *dev) t=jiffies; while (!readl(olympic_mmio+CLKCTL) & CLKCTL_PAUSE) { schedule() ; - if(jiffies-t > 2*HZ) { + if(time_after(jiffies, t + 2*HZ)) { printk(KERN_ERR "IBM Cardbus tokenring adapter not responsing.\n") ; return -ENODEV; } @@ -373,7 +374,7 @@ static int __devinit olympic_init(struct net_device *dev) t=jiffies; while(!((readl(olympic_mmio+SISR_RR)) & SISR_SRB_REPLY)) { schedule(); - if(jiffies-t > 15*HZ) { + if(time_after(jiffies, t + 15*HZ)) { printk(KERN_ERR "IBM PCI tokenring card not responding.\n"); return -ENODEV; } @@ -519,7 +520,7 @@ static int olympic_open(struct net_device *dev) olympic_priv->srb_queued=0; break; } - if ((jiffies-t) > 10*HZ) { + if (time_after(jiffies, t + 10*HZ)) { printk(KERN_WARNING "%s: SRB timed out. \n",dev->name) ; olympic_priv->srb_queued=0; break ; diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 2d0cfbc..6299e18 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -402,8 +402,7 @@ static void de_rx (struct de_private *de) unsigned copying_skb, buflen; skb = de->rx_skb[rx_tail].skb; - if (!skb) - BUG(); + BUG_ON(!skb); rmb(); status = le32_to_cpu(de->rx_ring[rx_tail].opts1); if (status & DescOwn) @@ -545,8 +544,7 @@ static void de_tx (struct de_private *de) break; skb = de->tx_skb[tx_tail].skb; - if (!skb) - BUG(); + BUG_ON(!skb); if (unlikely(skb == DE_DUMMY_SKB)) goto next; @@ -789,8 +787,7 @@ static void __de_set_rx_mode (struct net_device *dev) de->tx_head = NEXT_TX(entry); - if (TX_BUFFS_AVAIL(de) < 0) - BUG(); + BUG_ON(TX_BUFFS_AVAIL(de) < 0); if (TX_BUFFS_AVAIL(de) == 0) netif_stop_queue(dev); @@ -916,8 +913,7 @@ static void de_set_media (struct de_private *de) unsigned media = de->media_type; u32 macmode = dr32(MacMode); - if (de_is_running(de)) - BUG(); + BUG_ON(de_is_running(de)); if (de->de21040) dw32(CSR11, FULL_DUPLEX_MAGIC); @@ -1153,8 +1149,7 @@ static void de_media_interrupt (struct de_private *de, u32 status) return; } - if (!(status & LinkFail)) - BUG(); + BUG_ON(!(status & LinkFail)); if (netif_carrier_ok(de->dev)) { de_link_down(de); @@ -2092,8 +2087,7 @@ static void __exit de_remove_one (struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct de_private *de = dev->priv; - if (!dev) - BUG(); + BUG_ON(!dev); unregister_netdev(dev); kfree(de->ee_data); iounmap(de->regs); diff --git a/drivers/net/tulip/pnic.c b/drivers/net/tulip/pnic.c index d9980bd..ca7e532 100644 --- a/drivers/net/tulip/pnic.c +++ b/drivers/net/tulip/pnic.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/jiffies.h> #include "tulip.h" @@ -68,7 +69,7 @@ void pnic_lnk_change(struct net_device *dev, int csr5) */ if (tulip_media_cap[dev->if_port] & MediaIsMII) return; - if (! tp->nwayset || jiffies - dev->trans_start > 1*HZ) { + if (! tp->nwayset || time_after(jiffies, dev->trans_start + 1*HZ)) { tp->csr6 = 0x00420000 | (tp->csr6 & 0x0000fdff); iowrite32(tp->csr6, ioaddr + CSR6); iowrite32(0x30, ioaddr + CSR12); diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c index 5b1af39..ba05ded 100644 --- a/drivers/net/tulip/winbond-840.c +++ b/drivers/net/tulip/winbond-840.c @@ -1645,7 +1645,7 @@ static int w840_suspend (struct pci_dev *pdev, pm_message_t state) /* no more hardware accesses behind this line. */ - if (np->csr6) BUG(); + BUG_ON(np->csr6); if (ioread32(ioaddr + IntrEnable)) BUG(); /* pci_power_off(pdev, -1); */ diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c index 60d1e05..5634410 100644 --- a/drivers/net/tulip/xircom_cb.c +++ b/drivers/net/tulip/xircom_cb.c @@ -32,6 +32,9 @@ #include <asm/uaccess.h> #include <asm/io.h> +#ifdef CONFIG_NET_POLL_CONTROLLER +#include <asm/irq.h> +#endif #ifdef DEBUG #define enter(x) printk("Enter: %s, %s line %i\n",x,__FILE__,__LINE__) @@ -598,10 +601,8 @@ static void setup_descriptors(struct xircom_private *card) enter("setup_descriptors"); - if (card->rx_buffer == NULL) - BUG(); - if (card->tx_buffer == NULL) - BUG(); + BUG_ON(card->rx_buffer == NULL); + BUG_ON(card->tx_buffer == NULL); /* Receive descriptors */ memset(card->rx_buffer, 0, 128); /* clear the descriptors */ diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index 4c76cb7..cde35dd 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -178,7 +178,7 @@ enum typhoon_cards { }; /* directly indexed by enum typhoon_cards, above */ -static struct typhoon_card_info typhoon_card_info[] __devinitdata = { +static const struct typhoon_card_info typhoon_card_info[] __devinitdata = { { "3Com Typhoon (3C990-TX)", TYPHOON_CRYPTO_NONE}, { "3Com Typhoon (3CR990-TX-95)", diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 18c27e1..883cf7d 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -459,7 +459,7 @@ config WANPIPE_FR bool "WANPIPE Frame Relay support" depends on VENDOR_SANGOMA help - Connect a WANPIPE card to a Frame Relay network, or use Frame Felay + Connect a WANPIPE card to a Frame Relay network, or use Frame Relay API to develop custom applications. Contains the Ethernet Bridging over Frame Relay feature, where diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 7db1d1d..cf5c805 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -29,6 +29,7 @@ #include <linux/ioport.h> #include <net/arp.h> +#include <asm/irq.h> #include <asm/io.h> #include <asm/dma.h> #include <asm/byteorder.h> diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c index 5380ddf..050e854 100644 --- a/drivers/net/wan/sealevel.c +++ b/drivers/net/wan/sealevel.c @@ -23,6 +23,7 @@ #include <linux/init.h> #include <net/arp.h> +#include <asm/irq.h> #include <asm/io.h> #include <asm/dma.h> #include <asm/byteorder.h> diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index ef85d76..5b0a19a 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -6,7 +6,8 @@ menu "Wireless LAN (non-hamradio)" depends on NETDEVICES config NET_RADIO - bool "Wireless LAN drivers (non-hamradio) & Wireless Extensions" + bool "Wireless LAN drivers (non-hamradio)" + select WIRELESS_EXT ---help--- Support for wireless LANs and everything having to do with radio, but not with amateur radio or FM broadcasting. @@ -135,8 +136,9 @@ comment "Wireless 802.11b ISA/PCI cards support" config IPW2100 tristate "Intel PRO/Wireless 2100 Network Connection" - depends on NET_RADIO && PCI && IEEE80211 + depends on NET_RADIO && PCI select FW_LOADER + select IEEE80211 ---help--- A driver for the Intel PRO/Wireless 2100 Network Connection 802.11b wireless network adapter. @@ -188,8 +190,9 @@ config IPW2100_DEBUG config IPW2200 tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection" - depends on NET_RADIO && IEEE80211 && PCI + depends on NET_RADIO && PCI select FW_LOADER + select IEEE80211 ---help--- A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network Connection adapters. @@ -201,7 +204,7 @@ config IPW2200 In order to use this driver, you will need a firmware image for it. You can obtain the firmware from <http://ipw2200.sf.net/>. See the above referenced README.ipw2200 - for information on where to install the firmare images. + for information on where to install the firmware images. You will also very likely need the Wireless Tools in order to configure your card: @@ -213,6 +216,19 @@ config IPW2200 say M here and read <file:Documentation/modules.txt>. The module will be called ipw2200.ko. +config IPW2200_MONITOR + bool "Enable promiscuous mode" + depends on IPW2200 + ---help--- + Enables promiscuous/monitor mode support for the ipw2200 driver. + With this feature compiled into the driver, you can switch to + promiscuous mode via the Wireless Tool's Monitor mode. While in this + mode, no packets can be sent. + +config IPW_QOS + bool "Enable QoS support" + depends on IPW2200 && EXPERIMENTAL + config IPW2200_DEBUG bool "Enable full debugging output in IPW2200 module." depends on IPW2200 @@ -239,13 +255,14 @@ config IPW2200_DEBUG config AIRO tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards" - depends on NET_RADIO && ISA_DMA_API && CRYPTO && (PCI || BROKEN) + depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN) + select CRYPTO ---help--- This is the standard Linux driver to support Cisco/Aironet ISA and PCI 802.11 wireless cards. It supports the new 802.11b cards from Cisco (Cisco 34X, Cisco 35X - with or without encryption) as well as card before the Cisco - aquisition (Aironet 4500, Aironet 4800, Aironet 4800B). + acquisition (Aironet 4500, Aironet 4800, Aironet 4800B). This driver support both the standard Linux Wireless Extensions and Cisco proprietary API, so both the Linux Wireless Tools and the @@ -387,13 +404,14 @@ config PCMCIA_SPECTRUM config AIRO_CS tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards" depends on NET_RADIO && PCMCIA && (BROKEN || !M32R) + select CRYPTO ---help--- This is the standard Linux driver to support Cisco/Aironet PCMCIA 802.11 wireless cards. This driver is the same as the Aironet driver part of the Linux Pcmcia package. It supports the new 802.11b cards from Cisco (Cisco 34X, Cisco 35X - with or without encryption) as well as card before the Cisco - aquisition (Aironet 4500, Aironet 4800, Aironet 4800B). It also + acquisition (Aironet 4500, Aironet 4800, Aironet 4800B). It also supports OEM of Cisco such as the DELL TrueMobile 4800 and Xircom 802.11b cards. diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index a4c7ae9..864937a 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -36,6 +36,7 @@ #include <linux/in.h> #include <linux/bitops.h> #include <linux/scatterlist.h> +#include <linux/crypto.h> #include <asm/io.h> #include <asm/system.h> @@ -87,14 +88,6 @@ static struct pci_driver airo_driver = { #include <linux/delay.h> #endif -/* Support Cisco MIC feature */ -#define MICSUPPORT - -#if defined(MICSUPPORT) && !defined(CONFIG_CRYPTO) -#warning MIC support requires Crypto API -#undef MICSUPPORT -#endif - /* Hack to do some power saving */ #define POWER_ON_DOWN @@ -1118,7 +1111,6 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp); static int writerids(struct net_device *dev, aironet_ioctl *comp); static int flashcard(struct net_device *dev, aironet_ioctl *comp); #endif /* CISCO_EXT */ -#ifdef MICSUPPORT static void micinit(struct airo_info *ai); static int micsetup(struct airo_info *ai); static int encapsulate(struct airo_info *ai, etherHead *pPacket, MICBuffer *buffer, int len); @@ -1127,9 +1119,6 @@ static int decapsulate(struct airo_info *ai, MICBuffer *mic, etherHead *pPacket, static u8 airo_rssi_to_dbm (tdsRssiEntry *rssi_rid, u8 rssi); static u8 airo_dbm_to_pct (tdsRssiEntry *rssi_rid, u8 dbm); -#include <linux/crypto.h> -#endif - struct airo_info { struct net_device_stats stats; struct net_device *dev; @@ -1190,12 +1179,10 @@ struct airo_info { unsigned long scan_timestamp; /* Time started to scan */ struct iw_spy_data spy_data; struct iw_public_data wireless_data; -#ifdef MICSUPPORT /* MIC stuff */ struct crypto_tfm *tfm; mic_module mod[2]; mic_statistics micstats; -#endif HostRxDesc rxfids[MPI_MAX_FIDS]; // rx/tx/config MPI350 descriptors HostTxDesc txfids[MPI_MAX_FIDS]; HostRidDesc config_desc; @@ -1229,7 +1216,6 @@ static int flashgchar(struct airo_info *ai,int matchbyte,int dwelltime); static int flashputbuf(struct airo_info *ai); static int flashrestart(struct airo_info *ai,struct net_device *dev); -#ifdef MICSUPPORT /*********************************************************************** * MIC ROUTINES * *********************************************************************** @@ -1686,7 +1672,6 @@ static void emmh32_final(emmh32_context *context, u8 digest[4]) digest[2] = (val>>8) & 0xFF; digest[3] = val & 0xFF; } -#endif static int readBSSListRid(struct airo_info *ai, int first, BSSListRid *list) { @@ -2005,7 +1990,6 @@ static int mpi_send_packet (struct net_device *dev) * Firmware automaticly puts 802 header on so * we don't need to account for it in the length */ -#ifdef MICSUPPORT if (test_bit(FLAG_MIC_CAPABLE, &ai->flags) && ai->micstats.enabled && (ntohs(((u16 *)buffer)[6]) != 0x888E)) { MICBuffer pMic; @@ -2022,9 +2006,7 @@ static int mpi_send_packet (struct net_device *dev) memcpy (sendbuf, &pMic, sizeof(pMic)); sendbuf += sizeof(pMic); memcpy (sendbuf, buffer, len - sizeof(etherHead)); - } else -#endif - { + } else { *payloadLen = cpu_to_le16(len - sizeof(etherHead)); dev->trans_start = jiffies; @@ -2400,9 +2382,7 @@ void stop_airo_card( struct net_device *dev, int freeres ) ai->shared, ai->shared_dma); } } -#ifdef MICSUPPORT crypto_free_tfm(ai->tfm); -#endif del_airo_dev( dev ); free_netdev( dev ); } @@ -2726,9 +2706,7 @@ static struct net_device *_init_airo_card( unsigned short irq, int port, ai->thr_pid = kernel_thread(airo_thread, dev, CLONE_FS | CLONE_FILES); if (ai->thr_pid < 0) goto err_out_free; -#ifdef MICSUPPORT ai->tfm = NULL; -#endif rc = add_airo_dev( dev ); if (rc) goto err_out_thr; @@ -2969,10 +2947,8 @@ static int airo_thread(void *data) { airo_read_wireless_stats(ai); else if (test_bit(JOB_PROMISC, &ai->flags)) airo_set_promisc(ai); -#ifdef MICSUPPORT else if (test_bit(JOB_MIC, &ai->flags)) micinit(ai); -#endif else if (test_bit(JOB_EVENT, &ai->flags)) airo_send_event(dev); else if (test_bit(JOB_AUTOWEP, &ai->flags)) @@ -3010,12 +2986,10 @@ static irqreturn_t airo_interrupt ( int irq, void* dev_id, struct pt_regs *regs) if ( status & EV_MIC ) { OUT4500( apriv, EVACK, EV_MIC ); -#ifdef MICSUPPORT if (test_bit(FLAG_MIC_CAPABLE, &apriv->flags)) { set_bit(JOB_MIC, &apriv->flags); wake_up_interruptible(&apriv->thr_wait); } -#endif } if ( status & EV_LINK ) { union iwreq_data wrqu; @@ -3194,11 +3168,8 @@ static irqreturn_t airo_interrupt ( int irq, void* dev_id, struct pt_regs *regs) } bap_read (apriv, buffer + hdrlen/2, len, BAP0); } else { -#ifdef MICSUPPORT MICBuffer micbuf; -#endif bap_read (apriv, buffer, ETH_ALEN*2, BAP0); -#ifdef MICSUPPORT if (apriv->micstats.enabled) { bap_read (apriv,(u16*)&micbuf,sizeof(micbuf),BAP0); if (ntohs(micbuf.typelen) > 0x05DC) @@ -3211,15 +3182,10 @@ static irqreturn_t airo_interrupt ( int irq, void* dev_id, struct pt_regs *regs) skb_trim (skb, len + hdrlen); } } -#endif bap_read(apriv,buffer+ETH_ALEN,len,BAP0); -#ifdef MICSUPPORT if (decapsulate(apriv,&micbuf,(etherHead*)buffer,len)) { badmic: dev_kfree_skb_irq (skb); -#else - if (0) { -#endif badrx: OUT4500( apriv, EVACK, EV_RX); goto exitrx; @@ -3430,10 +3396,8 @@ static void mpi_receive_802_3(struct airo_info *ai) int len = 0; struct sk_buff *skb; char *buffer; -#ifdef MICSUPPORT int off = 0; MICBuffer micbuf; -#endif memcpy_fromio(&rxd, ai->rxfids[0].card_ram_off, sizeof(rxd)); /* Make sure we got something */ @@ -3448,7 +3412,6 @@ static void mpi_receive_802_3(struct airo_info *ai) goto badrx; } buffer = skb_put(skb,len); -#ifdef MICSUPPORT memcpy(buffer, ai->rxfids[0].virtual_host_addr, ETH_ALEN * 2); if (ai->micstats.enabled) { memcpy(&micbuf, @@ -3470,9 +3433,6 @@ badmic: dev_kfree_skb_irq (skb); goto badrx; } -#else - memcpy(buffer, ai->rxfids[0].virtual_host_addr, len); -#endif #ifdef WIRELESS_SPY if (ai->spy_data.spy_number > 0) { char *sa; @@ -3689,13 +3649,11 @@ static u16 setup_card(struct airo_info *ai, u8 *mac, int lock) ai->config.authType = AUTH_OPEN; ai->config.modulation = MOD_CCK; -#ifdef MICSUPPORT if ((cap_rid.len>=sizeof(cap_rid)) && (cap_rid.extSoftCap&1) && (micsetup(ai) == SUCCESS)) { ai->config.opmode |= MODE_MIC; set_bit(FLAG_MIC_CAPABLE, &ai->flags); } -#endif /* Save off the MAC */ for( i = 0; i < ETH_ALEN; i++ ) { @@ -4170,15 +4128,12 @@ static int transmit_802_3_packet(struct airo_info *ai, int len, char *pPacket) } len -= ETH_ALEN * 2; -#ifdef MICSUPPORT if (test_bit(FLAG_MIC_CAPABLE, &ai->flags) && ai->micstats.enabled && (ntohs(((u16 *)pPacket)[6]) != 0x888E)) { if (encapsulate(ai,(etherHead *)pPacket,&pMic,len) != SUCCESS) return ERROR; miclen = sizeof(pMic); } -#endif - // packet is destination[6], source[6], payload[len-12] // write the payload length and dst/src/payload if (bap_setup(ai, txFid, 0x0036, BAP1) != SUCCESS) return ERROR; @@ -5081,7 +5036,6 @@ static int set_wep_key(struct airo_info *ai, u16 index, wkr.len = sizeof(wkr); wkr.kindex = 0xffff; wkr.mac[0] = (char)index; - if (perm) printk(KERN_INFO "Setting transmit key to %d\n", index); if (perm) ai->defindex = (char)index; } else { // We are actually setting the key @@ -5090,7 +5044,6 @@ static int set_wep_key(struct airo_info *ai, u16 index, wkr.klen = keylen; memcpy( wkr.key, key, keylen ); memcpy( wkr.mac, macaddr, ETH_ALEN ); - printk(KERN_INFO "Setting key %d\n", index); } if (perm) disable_MAC(ai, lock); @@ -5801,11 +5754,13 @@ static int airo_set_wap(struct net_device *dev, Cmd cmd; Resp rsp; APListRid APList_rid; - static const unsigned char bcast[ETH_ALEN] = { 255, 255, 255, 255, 255, 255 }; + static const u8 any[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + static const u8 off[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; if (awrq->sa_family != ARPHRD_ETHER) return -EINVAL; - else if (!memcmp(bcast, awrq->sa_data, ETH_ALEN)) { + else if (!memcmp(any, awrq->sa_data, ETH_ALEN) || + !memcmp(off, awrq->sa_data, ETH_ALEN)) { memset(&cmd, 0, sizeof(cmd)); cmd.cmd=CMD_LOSE_SYNC; if (down_interruptible(&local->sem)) @@ -6296,6 +6251,272 @@ static int airo_get_encode(struct net_device *dev, /*------------------------------------------------------------------*/ /* + * Wireless Handler : set extended Encryption parameters + */ +static int airo_set_encodeext(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct airo_info *local = dev->priv; + struct iw_point *encoding = &wrqu->encoding; + struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; + CapabilityRid cap_rid; /* Card capability info */ + int perm = ( encoding->flags & IW_ENCODE_TEMP ? 0 : 1 ); + u16 currentAuthType = local->config.authType; + int idx, key_len, alg = ext->alg, set_key = 1; + wep_key_t key; + + /* Is WEP supported ? */ + readCapabilityRid(local, &cap_rid, 1); + /* Older firmware doesn't support this... + if(!(cap_rid.softCap & 2)) { + return -EOPNOTSUPP; + } */ + readConfigRid(local, 1); + + /* Determine and validate the key index */ + idx = encoding->flags & IW_ENCODE_INDEX; + if (idx) { + if (idx < 1 || idx > ((cap_rid.softCap & 0x80) ? 4:1)) + return -EINVAL; + idx--; + } else + idx = get_wep_key(local, 0xffff); + + if (encoding->flags & IW_ENCODE_DISABLED) + alg = IW_ENCODE_ALG_NONE; + + if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) { + /* Only set transmit key index here, actual + * key is set below if needed. + */ + set_wep_key(local, idx, NULL, 0, perm, 1); + set_key = ext->key_len > 0 ? 1 : 0; + } + + if (set_key) { + /* Set the requested key first */ + memset(key.key, 0, MAX_KEY_SIZE); + switch (alg) { + case IW_ENCODE_ALG_NONE: + key.len = 0; + break; + case IW_ENCODE_ALG_WEP: + if (ext->key_len > MIN_KEY_SIZE) { + key.len = MAX_KEY_SIZE; + } else if (ext->key_len > 0) { + key.len = MIN_KEY_SIZE; + } else { + return -EINVAL; + } + key_len = min (ext->key_len, key.len); + memcpy(key.key, ext->key, key_len); + break; + default: + return -EINVAL; + } + /* Send the key to the card */ + set_wep_key(local, idx, key.key, key.len, perm, 1); + } + + /* Read the flags */ + if(encoding->flags & IW_ENCODE_DISABLED) + local->config.authType = AUTH_OPEN; // disable encryption + if(encoding->flags & IW_ENCODE_RESTRICTED) + local->config.authType = AUTH_SHAREDKEY; // Only Both + if(encoding->flags & IW_ENCODE_OPEN) + local->config.authType = AUTH_ENCRYPT; // Only Wep + /* Commit the changes to flags if needed */ + if (local->config.authType != currentAuthType) + set_bit (FLAG_COMMIT, &local->flags); + + return -EINPROGRESS; +} + + +/*------------------------------------------------------------------*/ +/* + * Wireless Handler : get extended Encryption parameters + */ +static int airo_get_encodeext(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct airo_info *local = dev->priv; + struct iw_point *encoding = &wrqu->encoding; + struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; + CapabilityRid cap_rid; /* Card capability info */ + int idx, max_key_len; + + /* Is it supported ? */ + readCapabilityRid(local, &cap_rid, 1); + if(!(cap_rid.softCap & 2)) { + return -EOPNOTSUPP; + } + readConfigRid(local, 1); + + max_key_len = encoding->length - sizeof(*ext); + if (max_key_len < 0) + return -EINVAL; + + idx = encoding->flags & IW_ENCODE_INDEX; + if (idx) { + if (idx < 1 || idx > ((cap_rid.softCap & 0x80) ? 4:1)) + return -EINVAL; + idx--; + } else + idx = get_wep_key(local, 0xffff); + + encoding->flags = idx + 1; + memset(ext, 0, sizeof(*ext)); + + /* Check encryption mode */ + switch(local->config.authType) { + case AUTH_ENCRYPT: + encoding->flags = IW_ENCODE_ALG_WEP | IW_ENCODE_ENABLED; + break; + case AUTH_SHAREDKEY: + encoding->flags = IW_ENCODE_ALG_WEP | IW_ENCODE_ENABLED; + break; + default: + case AUTH_OPEN: + encoding->flags = IW_ENCODE_ALG_NONE | IW_ENCODE_DISABLED; + break; + } + /* We can't return the key, so set the proper flag and return zero */ + encoding->flags |= IW_ENCODE_NOKEY; + memset(extra, 0, 16); + + /* Copy the key to the user buffer */ + ext->key_len = get_wep_key(local, idx); + if (ext->key_len > 16) { + ext->key_len=0; + } + + return 0; +} + + +/*------------------------------------------------------------------*/ +/* + * Wireless Handler : set extended authentication parameters + */ +static int airo_set_auth(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct airo_info *local = dev->priv; + struct iw_param *param = &wrqu->param; + u16 currentAuthType = local->config.authType; + + switch (param->flags & IW_AUTH_INDEX) { + case IW_AUTH_WPA_VERSION: + case IW_AUTH_CIPHER_PAIRWISE: + case IW_AUTH_CIPHER_GROUP: + case IW_AUTH_KEY_MGMT: + case IW_AUTH_RX_UNENCRYPTED_EAPOL: + case IW_AUTH_PRIVACY_INVOKED: + /* + * airo does not use these parameters + */ + break; + + case IW_AUTH_DROP_UNENCRYPTED: + if (param->value) { + /* Only change auth type if unencrypted */ + if (currentAuthType == AUTH_OPEN) + local->config.authType = AUTH_ENCRYPT; + } else { + local->config.authType = AUTH_OPEN; + } + + /* Commit the changes to flags if needed */ + if (local->config.authType != currentAuthType) + set_bit (FLAG_COMMIT, &local->flags); + break; + + case IW_AUTH_80211_AUTH_ALG: { + /* FIXME: What about AUTH_OPEN? This API seems to + * disallow setting our auth to AUTH_OPEN. + */ + if (param->value & IW_AUTH_ALG_SHARED_KEY) { + local->config.authType = AUTH_SHAREDKEY; + } else if (param->value & IW_AUTH_ALG_OPEN_SYSTEM) { + local->config.authType = AUTH_ENCRYPT; + } else + return -EINVAL; + break; + + /* Commit the changes to flags if needed */ + if (local->config.authType != currentAuthType) + set_bit (FLAG_COMMIT, &local->flags); + } + + case IW_AUTH_WPA_ENABLED: + /* Silently accept disable of WPA */ + if (param->value > 0) + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + return -EINPROGRESS; +} + + +/*------------------------------------------------------------------*/ +/* + * Wireless Handler : get extended authentication parameters + */ +static int airo_get_auth(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct airo_info *local = dev->priv; + struct iw_param *param = &wrqu->param; + u16 currentAuthType = local->config.authType; + + switch (param->flags & IW_AUTH_INDEX) { + case IW_AUTH_DROP_UNENCRYPTED: + switch (currentAuthType) { + case AUTH_SHAREDKEY: + case AUTH_ENCRYPT: + param->value = 1; + break; + default: + param->value = 0; + break; + } + break; + + case IW_AUTH_80211_AUTH_ALG: + switch (currentAuthType) { + case AUTH_SHAREDKEY: + param->value = IW_AUTH_ALG_SHARED_KEY; + break; + case AUTH_ENCRYPT: + default: + param->value = IW_AUTH_ALG_OPEN_SYSTEM; + break; + } + break; + + case IW_AUTH_WPA_ENABLED: + param->value = 0; + break; + + default: + return -EOPNOTSUPP; + } + return 0; +} + + +/*------------------------------------------------------------------*/ +/* * Wireless Handler : set Tx-Power */ static int airo_set_txpow(struct net_device *dev, @@ -7050,6 +7271,15 @@ static const iw_handler airo_handler[] = (iw_handler) airo_get_encode, /* SIOCGIWENCODE */ (iw_handler) airo_set_power, /* SIOCSIWPOWER */ (iw_handler) airo_get_power, /* SIOCGIWPOWER */ + (iw_handler) NULL, /* -- hole -- */ + (iw_handler) NULL, /* -- hole -- */ + (iw_handler) NULL, /* SIOCSIWGENIE */ + (iw_handler) NULL, /* SIOCGIWGENIE */ + (iw_handler) airo_set_auth, /* SIOCSIWAUTH */ + (iw_handler) airo_get_auth, /* SIOCGIWAUTH */ + (iw_handler) airo_set_encodeext, /* SIOCSIWENCODEEXT */ + (iw_handler) airo_get_encodeext, /* SIOCGIWENCODEEXT */ + (iw_handler) NULL, /* SIOCSIWPMKSA */ }; /* Note : don't describe AIROIDIFC and AIROOLDIDIFC in here. @@ -7270,13 +7500,11 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { case AIROGSTAT: ridcode = RID_STATUS; break; case AIROGSTATSD32: ridcode = RID_STATSDELTA; break; case AIROGSTATSC32: ridcode = RID_STATS; break; -#ifdef MICSUPPORT case AIROGMICSTATS: if (copy_to_user(comp->data, &ai->micstats, min((int)comp->len,(int)sizeof(ai->micstats)))) return -EFAULT; return 0; -#endif case AIRORRID: ridcode = comp->ridnum; break; default: return -EINVAL; @@ -7308,9 +7536,7 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { static int writerids(struct net_device *dev, aironet_ioctl *comp) { struct airo_info *ai = dev->priv; int ridcode; -#ifdef MICSUPPORT int enabled; -#endif Resp rsp; static int (* writer)(struct airo_info *, u16 rid, const void *, int, int); unsigned char *iobuf; @@ -7367,11 +7593,9 @@ static int writerids(struct net_device *dev, aironet_ioctl *comp) { PC4500_readrid(ai,RID_STATSDELTACLEAR,iobuf,RIDSIZE, 1); -#ifdef MICSUPPORT enabled = ai->micstats.enabled; memset(&ai->micstats,0,sizeof(ai->micstats)); ai->micstats.enabled = enabled; -#endif if (copy_to_user(comp->data, iobuf, min((int)comp->len, (int)RIDSIZE))) { diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c index dfc2401..87afa68 100644 --- a/drivers/net/wireless/atmel.c +++ b/drivers/net/wireless/atmel.c @@ -137,44 +137,6 @@ static struct { #define MAC_BOOT_COMPLETE 0x0010 // MAC boot has been completed #define MAC_INIT_OK 0x0002 // MAC boot has been completed -#define C80211_SUBTYPE_MGMT_ASS_REQUEST 0x00 -#define C80211_SUBTYPE_MGMT_ASS_RESPONSE 0x10 -#define C80211_SUBTYPE_MGMT_REASS_REQUEST 0x20 -#define C80211_SUBTYPE_MGMT_REASS_RESPONSE 0x30 -#define C80211_SUBTYPE_MGMT_ProbeRequest 0x40 -#define C80211_SUBTYPE_MGMT_ProbeResponse 0x50 -#define C80211_SUBTYPE_MGMT_BEACON 0x80 -#define C80211_SUBTYPE_MGMT_ATIM 0x90 -#define C80211_SUBTYPE_MGMT_DISASSOSIATION 0xA0 -#define C80211_SUBTYPE_MGMT_Authentication 0xB0 -#define C80211_SUBTYPE_MGMT_Deauthentication 0xC0 - -#define C80211_MGMT_AAN_OPENSYSTEM 0x0000 -#define C80211_MGMT_AAN_SHAREDKEY 0x0001 - -#define C80211_MGMT_CAPABILITY_ESS 0x0001 // see 802.11 p.58 -#define C80211_MGMT_CAPABILITY_IBSS 0x0002 // - " - -#define C80211_MGMT_CAPABILITY_CFPollable 0x0004 // - " - -#define C80211_MGMT_CAPABILITY_CFPollRequest 0x0008 // - " - -#define C80211_MGMT_CAPABILITY_Privacy 0x0010 // - " - - -#define C80211_MGMT_SC_Success 0 -#define C80211_MGMT_SC_Unspecified 1 -#define C80211_MGMT_SC_SupportCapabilities 10 -#define C80211_MGMT_SC_ReassDenied 11 -#define C80211_MGMT_SC_AssDenied 12 -#define C80211_MGMT_SC_AuthAlgNotSupported 13 -#define C80211_MGMT_SC_AuthTransSeqNumError 14 -#define C80211_MGMT_SC_AuthRejectChallenge 15 -#define C80211_MGMT_SC_AuthRejectTimeout 16 -#define C80211_MGMT_SC_AssDeniedHandleAP 17 -#define C80211_MGMT_SC_AssDeniedBSSRate 18 - -#define C80211_MGMT_ElementID_SSID 0 -#define C80211_MGMT_ElementID_SupportedRates 1 -#define C80211_MGMT_ElementID_ChallengeText 16 -#define C80211_MGMT_CAPABILITY_ShortPreamble 0x0020 - #define MIB_MAX_DATA_BYTES 212 #define MIB_HEADER_SIZE 4 /* first four fields */ @@ -2835,7 +2797,7 @@ static void handle_beacon_probe(struct atmel_private *priv, u16 capability, u8 channel) { int rejoin = 0; - int new = capability & C80211_MGMT_CAPABILITY_ShortPreamble ? + int new = capability & MFIE_TYPE_POWER_CONSTRAINT ? SHORT_PREAMBLE : LONG_PREAMBLE; if (priv->preamble != new) { @@ -2921,11 +2883,11 @@ static void send_association_request(struct atmel_private *priv, int is_reassoc) memcpy(header.addr2, priv->dev->dev_addr, 6); memcpy(header.addr3, priv->CurrentBSSID, 6); - body.capability = cpu_to_le16(C80211_MGMT_CAPABILITY_ESS); + body.capability = cpu_to_le16(WLAN_CAPABILITY_ESS); if (priv->wep_is_on) - body.capability |= cpu_to_le16(C80211_MGMT_CAPABILITY_Privacy); + body.capability |= cpu_to_le16(WLAN_CAPABILITY_PRIVACY); if (priv->preamble == SHORT_PREAMBLE) - body.capability |= cpu_to_le16(C80211_MGMT_CAPABILITY_ShortPreamble); + body.capability |= cpu_to_le16(MFIE_TYPE_POWER_CONSTRAINT); body.listen_interval = cpu_to_le16(priv->listen_interval * priv->beacon_period); @@ -2939,10 +2901,10 @@ static void send_association_request(struct atmel_private *priv, int is_reassoc) bodysize = 12 + priv->SSID_size; } - ssid_el_p[0] = C80211_MGMT_ElementID_SSID; + ssid_el_p[0] = MFIE_TYPE_SSID; ssid_el_p[1] = priv->SSID_size; memcpy(ssid_el_p + 2, priv->SSID, priv->SSID_size); - ssid_el_p[2 + priv->SSID_size] = C80211_MGMT_ElementID_SupportedRates; + ssid_el_p[2 + priv->SSID_size] = MFIE_TYPE_RATES; ssid_el_p[3 + priv->SSID_size] = 4; /* len of suported rates */ memcpy(ssid_el_p + 4 + priv->SSID_size, atmel_basic_rates, 4); @@ -3004,7 +2966,7 @@ static void store_bss_info(struct atmel_private *priv, u16 beacon_period, u8 channel, u8 rssi, u8 ssid_len, u8 *ssid, int is_beacon) { - u8 *bss = capability & C80211_MGMT_CAPABILITY_ESS ? header->addr2 : header->addr3; + u8 *bss = capability & WLAN_CAPABILITY_ESS ? header->addr2 : header->addr3; int i, index; for (index = -1, i = 0; i < priv->BSS_list_entries; i++) @@ -3030,16 +2992,16 @@ static void store_bss_info(struct atmel_private *priv, priv->BSSinfo[index].channel = channel; priv->BSSinfo[index].beacon_period = beacon_period; - priv->BSSinfo[index].UsingWEP = capability & C80211_MGMT_CAPABILITY_Privacy; + priv->BSSinfo[index].UsingWEP = capability & WLAN_CAPABILITY_PRIVACY; memcpy(priv->BSSinfo[index].SSID, ssid, ssid_len); priv->BSSinfo[index].SSIDsize = ssid_len; - if (capability & C80211_MGMT_CAPABILITY_IBSS) + if (capability & WLAN_CAPABILITY_IBSS) priv->BSSinfo[index].BSStype = IW_MODE_ADHOC; - else if (capability & C80211_MGMT_CAPABILITY_ESS) + else if (capability & WLAN_CAPABILITY_ESS) priv->BSSinfo[index].BSStype =IW_MODE_INFRA; - priv->BSSinfo[index].preamble = capability & C80211_MGMT_CAPABILITY_ShortPreamble ? + priv->BSSinfo[index].preamble = capability & MFIE_TYPE_POWER_CONSTRAINT ? SHORT_PREAMBLE : LONG_PREAMBLE; } @@ -3050,7 +3012,7 @@ static void authenticate(struct atmel_private *priv, u16 frame_len) u16 trans_seq_no = le16_to_cpu(auth->trans_seq); u16 system = le16_to_cpu(auth->alg); - if (status == C80211_MGMT_SC_Success && !priv->wep_is_on) { + if (status == WLAN_STATUS_SUCCESS && !priv->wep_is_on) { /* no WEP */ if (priv->station_was_associated) { atmel_enter_state(priv, STATION_STATE_REASSOCIATING); @@ -3063,19 +3025,19 @@ static void authenticate(struct atmel_private *priv, u16 frame_len) } } - if (status == C80211_MGMT_SC_Success && priv->wep_is_on) { + if (status == WLAN_STATUS_SUCCESS && priv->wep_is_on) { int should_associate = 0; /* WEP */ if (trans_seq_no != priv->ExpectedAuthentTransactionSeqNum) return; - if (system == C80211_MGMT_AAN_OPENSYSTEM) { + if (system == WLAN_AUTH_OPEN) { if (trans_seq_no == 0x0002) { should_associate = 1; } - } else if (system == C80211_MGMT_AAN_SHAREDKEY) { + } else if (system == WLAN_AUTH_SHARED_KEY) { if (trans_seq_no == 0x0002 && - auth->el_id == C80211_MGMT_ElementID_ChallengeText) { + auth->el_id == MFIE_TYPE_CHALLENGE) { send_authentication_request(priv, system, auth->chall_text, auth->chall_text_len); return; } else if (trans_seq_no == 0x0004) { @@ -3140,8 +3102,8 @@ static void associate(struct atmel_private *priv, u16 frame_len, u16 subtype) if (frame_len < 8 + rates_len) return; - if (status == C80211_MGMT_SC_Success) { - if (subtype == C80211_SUBTYPE_MGMT_ASS_RESPONSE) + if (status == WLAN_STATUS_SUCCESS) { + if (subtype == IEEE80211_STYPE_ASSOC_RESP) priv->AssociationRequestRetryCnt = 0; else priv->ReAssociationRequestRetryCnt = 0; @@ -3178,9 +3140,9 @@ static void associate(struct atmel_private *priv, u16 frame_len, u16 subtype) return; } - if (subtype == C80211_SUBTYPE_MGMT_ASS_RESPONSE && - status != C80211_MGMT_SC_AssDeniedBSSRate && - status != C80211_MGMT_SC_SupportCapabilities && + if (subtype == IEEE80211_STYPE_ASSOC_RESP && + status != WLAN_STATUS_ASSOC_DENIED_RATES && + status != WLAN_STATUS_CAPS_UNSUPPORTED && priv->AssociationRequestRetryCnt < MAX_ASSOCIATION_RETRIES) { mod_timer(&priv->management_timer, jiffies + MGMT_JIFFIES); priv->AssociationRequestRetryCnt++; @@ -3188,9 +3150,9 @@ static void associate(struct atmel_private *priv, u16 frame_len, u16 subtype) return; } - if (subtype == C80211_SUBTYPE_MGMT_REASS_RESPONSE && - status != C80211_MGMT_SC_AssDeniedBSSRate && - status != C80211_MGMT_SC_SupportCapabilities && + if (subtype == IEEE80211_STYPE_REASSOC_RESP && + status != WLAN_STATUS_ASSOC_DENIED_RATES && + status != WLAN_STATUS_CAPS_UNSUPPORTED && priv->AssociationRequestRetryCnt < MAX_ASSOCIATION_RETRIES) { mod_timer(&priv->management_timer, jiffies + MGMT_JIFFIES); priv->ReAssociationRequestRetryCnt++; @@ -3325,8 +3287,8 @@ static void atmel_management_frame(struct atmel_private *priv, subtype = le16_to_cpu(header->frame_ctl) & IEEE80211_FCTL_STYPE; switch (subtype) { - case C80211_SUBTYPE_MGMT_BEACON: - case C80211_SUBTYPE_MGMT_ProbeResponse: + case IEEE80211_STYPE_BEACON: + case IEEE80211_STYPE_PROBE_RESP: /* beacon frame has multiple variable-length fields - never let an engineer loose with a data structure design. */ @@ -3384,19 +3346,19 @@ static void atmel_management_frame(struct atmel_private *priv, beacon_interval, channel, rssi, ssid_length, &beacon->rates_el_id, - subtype == C80211_SUBTYPE_MGMT_BEACON); + subtype == IEEE80211_STYPE_BEACON); } break; - case C80211_SUBTYPE_MGMT_Authentication: + case IEEE80211_STYPE_AUTH: if (priv->station_state == STATION_STATE_AUTHENTICATING) authenticate(priv, frame_len); break; - case C80211_SUBTYPE_MGMT_ASS_RESPONSE: - case C80211_SUBTYPE_MGMT_REASS_RESPONSE: + case IEEE80211_STYPE_ASSOC_RESP: + case IEEE80211_STYPE_REASSOC_RESP: if (priv->station_state == STATION_STATE_ASSOCIATING || priv->station_state == STATION_STATE_REASSOCIATING) @@ -3404,7 +3366,7 @@ static void atmel_management_frame(struct atmel_private *priv, break; - case C80211_SUBTYPE_MGMT_DISASSOSIATION: + case IEEE80211_STYPE_DISASSOC: if (priv->station_is_associated && priv->operating_mode == IW_MODE_INFRA && is_frame_from_current_bss(priv, header)) { @@ -3417,7 +3379,7 @@ static void atmel_management_frame(struct atmel_private *priv, break; - case C80211_SUBTYPE_MGMT_Deauthentication: + case IEEE80211_STYPE_DEAUTH: if (priv->operating_mode == IW_MODE_INFRA && is_frame_from_current_bss(priv, header)) { priv->station_was_associated = 0; @@ -3453,12 +3415,12 @@ static void atmel_management_timer(u_long a) priv->AuthenticationRequestRetryCnt = 0; restart_search(priv); } else { - int auth = C80211_MGMT_AAN_OPENSYSTEM; + int auth = WLAN_AUTH_OPEN; priv->AuthenticationRequestRetryCnt++; priv->CurrentAuthentTransactionSeqNum = 0x0001; mod_timer(&priv->management_timer, jiffies + MGMT_JIFFIES); if (priv->wep_is_on && priv->exclude_unencrypted) - auth = C80211_MGMT_AAN_SHAREDKEY; + auth = WLAN_AUTH_SHARED_KEY; send_authentication_request(priv, auth, NULL, 0); } break; @@ -3558,14 +3520,14 @@ static void atmel_command_irq(struct atmel_private *priv) priv->station_was_associated = priv->station_is_associated; atmel_enter_state(priv, STATION_STATE_READY); } else { - int auth = C80211_MGMT_AAN_OPENSYSTEM; + int auth = WLAN_AUTH_OPEN; priv->AuthenticationRequestRetryCnt = 0; atmel_enter_state(priv, STATION_STATE_AUTHENTICATING); mod_timer(&priv->management_timer, jiffies + MGMT_JIFFIES); priv->CurrentAuthentTransactionSeqNum = 0x0001; if (priv->wep_is_on && priv->exclude_unencrypted) - auth = C80211_MGMT_AAN_SHAREDKEY; + auth = WLAN_AUTH_SHARED_KEY; send_authentication_request(priv, auth, NULL, 0); } return; diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 6290c9f..72335c8 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -167,12 +167,12 @@ that only one external action is invoked at a time. #include "ipw2100.h" -#define IPW2100_VERSION "1.1.3" +#define IPW2100_VERSION "git-1.2.2" #define DRV_NAME "ipw2100" #define DRV_VERSION IPW2100_VERSION #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2100 Network Driver" -#define DRV_COPYRIGHT "Copyright(c) 2003-2005 Intel Corporation" +#define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" /* Debugging stuff */ #ifdef CONFIG_IPW2100_DEBUG @@ -1418,7 +1418,7 @@ static int ipw2100_enable_adapter(struct ipw2100_priv *priv) if (priv->status & STATUS_ENABLED) return 0; - down(&priv->adapter_sem); + mutex_lock(&priv->adapter_mutex); if (rf_kill_active(priv)) { IPW_DEBUG_HC("Command aborted due to RF kill active.\n"); @@ -1444,7 +1444,7 @@ static int ipw2100_enable_adapter(struct ipw2100_priv *priv) } fail_up: - up(&priv->adapter_sem); + mutex_unlock(&priv->adapter_mutex); return err; } @@ -1576,7 +1576,7 @@ static int ipw2100_disable_adapter(struct ipw2100_priv *priv) cancel_delayed_work(&priv->hang_check); } - down(&priv->adapter_sem); + mutex_lock(&priv->adapter_mutex); err = ipw2100_hw_send_command(priv, &cmd); if (err) { @@ -1595,7 +1595,7 @@ static int ipw2100_disable_adapter(struct ipw2100_priv *priv) IPW_DEBUG_INFO("TODO: implement scan state machine\n"); fail_up: - up(&priv->adapter_sem); + mutex_unlock(&priv->adapter_mutex); return err; } @@ -1672,6 +1672,18 @@ static int ipw2100_start_scan(struct ipw2100_priv *priv) return err; } +static const struct ieee80211_geo ipw_geos[] = { + { /* Restricted */ + "---", + .bg_channels = 14, + .bg = {{2412, 1}, {2417, 2}, {2422, 3}, + {2427, 4}, {2432, 5}, {2437, 6}, + {2442, 7}, {2447, 8}, {2452, 9}, + {2457, 10}, {2462, 11}, {2467, 12}, + {2472, 13}, {2484, 14}}, + }, +}; + static int ipw2100_up(struct ipw2100_priv *priv, int deferred) { unsigned long flags; @@ -1727,6 +1739,13 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred) goto exit; } + /* Initialize the geo */ + if (ieee80211_set_geo(priv->ieee, &ipw_geos[0])) { + printk(KERN_WARNING DRV_NAME "Could not set geo\n"); + return 0; + } + priv->ieee->freq_band = IEEE80211_24GHZ_BAND; + lock = LOCK_NONE; if (ipw2100_set_ordinal(priv, IPW_ORD_PERS_DB_LOCK, &lock, &ord_len)) { printk(KERN_ERR DRV_NAME @@ -1869,7 +1888,7 @@ static void ipw2100_reset_adapter(struct ipw2100_priv *priv) priv->status |= STATUS_RESET_PENDING; spin_unlock_irqrestore(&priv->low_lock, flags); - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); /* stop timed checks so that they don't interfere with reset */ priv->stop_hang_check = 1; cancel_delayed_work(&priv->hang_check); @@ -1879,7 +1898,7 @@ static void ipw2100_reset_adapter(struct ipw2100_priv *priv) wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); ipw2100_up(priv, 0); - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); } @@ -2371,15 +2390,6 @@ static void isr_rx(struct ipw2100_priv *priv, int i, IPW_DEBUG_DROP("Dropping packet while interface is not up.\n"); return; } -#ifdef CONFIG_IPW2100_MONITOR - if (unlikely(priv->ieee->iw_mode == IW_MODE_MONITOR && - priv->config & CFG_CRC_CHECK && - status->flags & IPW_STATUS_FLAG_CRC_ERROR)) { - IPW_DEBUG_RX("CRC error in packet. Dropping.\n"); - priv->ieee->stats.rx_errors++; - return; - } -#endif if (unlikely(priv->ieee->iw_mode != IW_MODE_MONITOR && !(priv->status & STATUS_ASSOCIATED))) { @@ -2427,6 +2437,89 @@ static void isr_rx(struct ipw2100_priv *priv, int i, priv->rx_queue.drv[i].host_addr = packet->dma_addr; } +#ifdef CONFIG_IPW2100_MONITOR + +static void isr_rx_monitor(struct ipw2100_priv *priv, int i, + struct ieee80211_rx_stats *stats) +{ + struct ipw2100_status *status = &priv->status_queue.drv[i]; + struct ipw2100_rx_packet *packet = &priv->rx_buffers[i]; + + /* Magic struct that slots into the radiotap header -- no reason + * to build this manually element by element, we can write it much + * more efficiently than we can parse it. ORDER MATTERS HERE */ + struct ipw_rt_hdr { + struct ieee80211_radiotap_header rt_hdr; + s8 rt_dbmsignal; /* signal in dbM, kluged to signed */ + } *ipw_rt; + + IPW_DEBUG_RX("Handler...\n"); + + if (unlikely(status->frame_size > skb_tailroom(packet->skb) - + sizeof(struct ipw_rt_hdr))) { + IPW_DEBUG_INFO("%s: frame_size (%u) > skb_tailroom (%u)!" + " Dropping.\n", + priv->net_dev->name, + status->frame_size, + skb_tailroom(packet->skb)); + priv->ieee->stats.rx_errors++; + return; + } + + if (unlikely(!netif_running(priv->net_dev))) { + priv->ieee->stats.rx_errors++; + priv->wstats.discard.misc++; + IPW_DEBUG_DROP("Dropping packet while interface is not up.\n"); + return; + } + + if (unlikely(priv->config & CFG_CRC_CHECK && + status->flags & IPW_STATUS_FLAG_CRC_ERROR)) { + IPW_DEBUG_RX("CRC error in packet. Dropping.\n"); + priv->ieee->stats.rx_errors++; + return; + } + + pci_unmap_single(priv->pci_dev, packet->dma_addr, + sizeof(struct ipw2100_rx), PCI_DMA_FROMDEVICE); + memmove(packet->skb->data + sizeof(struct ipw_rt_hdr), + packet->skb->data, status->frame_size); + + ipw_rt = (struct ipw_rt_hdr *) packet->skb->data; + + ipw_rt->rt_hdr.it_version = PKTHDR_RADIOTAP_VERSION; + ipw_rt->rt_hdr.it_pad = 0; /* always good to zero */ + ipw_rt->rt_hdr.it_len = sizeof(struct ipw_rt_hdr); /* total hdr+data */ + + ipw_rt->rt_hdr.it_present = 1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL; + + ipw_rt->rt_dbmsignal = status->rssi + IPW2100_RSSI_TO_DBM; + + skb_put(packet->skb, status->frame_size + sizeof(struct ipw_rt_hdr)); + + if (!ieee80211_rx(priv->ieee, packet->skb, stats)) { + priv->ieee->stats.rx_errors++; + + /* ieee80211_rx failed, so it didn't free the SKB */ + dev_kfree_skb_any(packet->skb); + packet->skb = NULL; + } + + /* We need to allocate a new SKB and attach it to the RDB. */ + if (unlikely(ipw2100_alloc_skb(priv, packet))) { + IPW_DEBUG_WARNING( + "%s: Unable to allocate SKB onto RBD ring - disabling " + "adapter.\n", priv->net_dev->name); + /* TODO: schedule adapter shutdown */ + IPW_DEBUG_INFO("TODO: Shutdown adapter...\n"); + } + + /* Update the RDB entry */ + priv->rx_queue.drv[i].host_addr = packet->dma_addr; +} + +#endif + static int ipw2100_corruption_check(struct ipw2100_priv *priv, int i) { struct ipw2100_status *status = &priv->status_queue.drv[i]; @@ -2558,7 +2651,7 @@ static void __ipw2100_rx_process(struct ipw2100_priv *priv) case P8023_DATA_VAL: #ifdef CONFIG_IPW2100_MONITOR if (priv->ieee->iw_mode == IW_MODE_MONITOR) { - isr_rx(priv, i, &stats); + isr_rx_monitor(priv, i, &stats); break; } #endif @@ -3750,7 +3843,7 @@ static ssize_t store_memory(struct device *d, struct device_attribute *attr, struct net_device *dev = priv->net_dev; const char *p = buf; - (void) dev; /* kill unused-var warning for debug-only code */ + (void)dev; /* kill unused-var warning for debug-only code */ if (count < 1) return count; @@ -3863,7 +3956,7 @@ static int ipw2100_switch_mode(struct ipw2100_priv *priv, u32 mode) #ifdef CONFIG_IPW2100_MONITOR case IW_MODE_MONITOR: priv->last_mode = priv->ieee->iw_mode; - priv->net_dev->type = ARPHRD_IEEE80211; + priv->net_dev->type = ARPHRD_IEEE80211_RADIOTAP; break; #endif /* CONFIG_IPW2100_MONITOR */ } @@ -4070,7 +4163,7 @@ static ssize_t store_scan_age(struct device *d, struct device_attribute *attr, unsigned long val; char *p = buffer; - (void) dev; /* kill unused-var warning for debug-only code */ + (void)dev; /* kill unused-var warning for debug-only code */ IPW_DEBUG_INFO("enter\n"); @@ -4119,7 +4212,7 @@ static int ipw_radio_kill_sw(struct ipw2100_priv *priv, int disable_radio) IPW_DEBUG_RF_KILL("Manual SW RF Kill set to: RADIO %s\n", disable_radio ? "OFF" : "ON"); - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (disable_radio) { priv->status |= STATUS_RF_KILL_SW; @@ -4137,7 +4230,7 @@ static int ipw_radio_kill_sw(struct ipw2100_priv *priv, int disable_radio) schedule_reset(priv); } - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return 1; } @@ -5107,12 +5200,13 @@ static int ipw2100_set_tx_power(struct ipw2100_priv *priv, u32 tx_power) .host_command_length = 4 }; int err = 0; + u32 tmp = tx_power; if (tx_power != IPW_TX_POWER_DEFAULT) - tx_power = (tx_power - IPW_TX_POWER_MIN_DBM) * 16 / - (IPW_TX_POWER_MAX_DBM - IPW_TX_POWER_MIN_DBM); + tmp = (tx_power - IPW_TX_POWER_MIN_DBM) * 16 / + (IPW_TX_POWER_MAX_DBM - IPW_TX_POWER_MIN_DBM); - cmd.host_command_parameters[0] = tx_power; + cmd.host_command_parameters[0] = tmp; if (priv->ieee->iw_mode == IW_MODE_ADHOC) err = ipw2100_hw_send_command(priv, &cmd); @@ -5365,9 +5459,12 @@ static int ipw2100_configure_security(struct ipw2100_priv *priv, int batch_mode) SEC_LEVEL_0, 0, 1); } else { auth_mode = IPW_AUTH_OPEN; - if ((priv->ieee->sec.flags & SEC_AUTH_MODE) && - (priv->ieee->sec.auth_mode == WLAN_AUTH_SHARED_KEY)) - auth_mode = IPW_AUTH_SHARED; + if (priv->ieee->sec.flags & SEC_AUTH_MODE) { + if (priv->ieee->sec.auth_mode == WLAN_AUTH_SHARED_KEY) + auth_mode = IPW_AUTH_SHARED; + else if (priv->ieee->sec.auth_mode == WLAN_AUTH_LEAP) + auth_mode = IPW_AUTH_LEAP_CISCO_ID; + } sec_level = SEC_LEVEL_0; if (priv->ieee->sec.flags & SEC_LEVEL) @@ -5437,7 +5534,7 @@ static void shim__set_security(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); int i, force_update = 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) goto done; @@ -5510,7 +5607,7 @@ static void shim__set_security(struct net_device *dev, if (!(priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING))) ipw2100_configure_security(priv, 0); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); } static int ipw2100_adapter_setup(struct ipw2100_priv *priv) @@ -5634,7 +5731,7 @@ static int ipw2100_set_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); priv->config |= CFG_CUSTOM_MAC; memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN); @@ -5644,12 +5741,12 @@ static int ipw2100_set_address(struct net_device *dev, void *p) goto done; priv->reset_backoff = 0; - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); ipw2100_reset_adapter(priv); return 0; done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -5760,6 +5857,9 @@ static int ipw2100_wpa_set_auth_algs(struct ipw2100_priv *priv, int value) } else if (value & IW_AUTH_ALG_OPEN_SYSTEM) { sec.auth_mode = WLAN_AUTH_OPEN; ieee->open_wep = 1; + } else if (value & IW_AUTH_ALG_LEAP) { + sec.auth_mode = WLAN_AUTH_LEAP; + ieee->open_wep = 1; } else return -EINVAL; @@ -5771,8 +5871,8 @@ static int ipw2100_wpa_set_auth_algs(struct ipw2100_priv *priv, int value) return ret; } -void ipw2100_wpa_assoc_frame(struct ipw2100_priv *priv, - char *wpa_ie, int wpa_ie_len) +static void ipw2100_wpa_assoc_frame(struct ipw2100_priv *priv, + char *wpa_ie, int wpa_ie_len) { struct ipw2100_wpa_assoc_frame frame; @@ -5989,8 +6089,8 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, strcpy(priv->nick, "ipw2100"); spin_lock_init(&priv->low_lock); - sema_init(&priv->action_sem, 1); - sema_init(&priv->adapter_sem, 1); + mutex_init(&priv->action_mutex); + mutex_init(&priv->adapter_mutex); init_waitqueue_head(&priv->wait_command_queue); @@ -6155,7 +6255,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, * member to call a function that then just turns and calls ipw2100_up. * net_dev->init is called after name allocation but before the * notifier chain is called */ - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); err = register_netdev(dev); if (err) { printk(KERN_WARNING DRV_NAME @@ -6191,12 +6291,12 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, priv->status |= STATUS_INITIALIZED; - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return 0; fail_unlock: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); fail: if (dev) { @@ -6236,7 +6336,7 @@ static void __devexit ipw2100_pci_remove_one(struct pci_dev *pci_dev) struct net_device *dev; if (priv) { - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); priv->status &= ~STATUS_INITIALIZED; @@ -6251,9 +6351,9 @@ static void __devexit ipw2100_pci_remove_one(struct pci_dev *pci_dev) /* Take down the hardware */ ipw2100_down(priv); - /* Release the semaphore so that the network subsystem can + /* Release the mutex so that the network subsystem can * complete any needed calls into the driver... */ - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); /* Unregister the device first - this results in close() * being called if the device is open. If we free storage @@ -6292,7 +6392,7 @@ static int ipw2100_suspend(struct pci_dev *pci_dev, pm_message_t state) IPW_DEBUG_INFO("%s: Going into suspend...\n", dev->name); - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (priv->status & STATUS_INITIALIZED) { /* Take down the device; powers it off, etc. */ ipw2100_down(priv); @@ -6305,7 +6405,7 @@ static int ipw2100_suspend(struct pci_dev *pci_dev, pm_message_t state) pci_disable_device(pci_dev); pci_set_power_state(pci_dev, PCI_D3hot); - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return 0; } @@ -6319,7 +6419,7 @@ static int ipw2100_resume(struct pci_dev *pci_dev) if (IPW2100_PM_DISABLED) return 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); IPW_DEBUG_INFO("%s: Coming out of suspend...\n", dev->name); @@ -6345,7 +6445,7 @@ static int ipw2100_resume(struct pci_dev *pci_dev) if (!(priv->status & STATUS_RF_KILL_SW)) ipw2100_up(priv, 0); - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return 0; } @@ -6509,7 +6609,7 @@ static int ipw2100_wx_set_freq(struct net_device *dev, if (priv->ieee->iw_mode == IW_MODE_INFRA) return -EOPNOTSUPP; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -6540,7 +6640,7 @@ static int ipw2100_wx_set_freq(struct net_device *dev, } done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -6581,7 +6681,7 @@ static int ipw2100_wx_set_mode(struct net_device *dev, if (wrqu->mode == priv->ieee->iw_mode) return 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -6604,7 +6704,7 @@ static int ipw2100_wx_set_mode(struct net_device *dev, } done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -6786,7 +6886,7 @@ static int ipw2100_wx_set_wap(struct net_device *dev, if (wrqu->ap_addr.sa_family != ARPHRD_ETHER) return -EINVAL; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -6815,7 +6915,7 @@ static int ipw2100_wx_set_wap(struct net_device *dev, wrqu->ap_addr.sa_data[5] & 0xff); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -6851,7 +6951,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev, int length = 0; int err = 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -6888,7 +6988,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev, err = ipw2100_set_essid(priv, essid, length, 0); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -6969,7 +7069,7 @@ static int ipw2100_wx_set_rate(struct net_device *dev, u32 rate; int err = 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -6996,7 +7096,7 @@ static int ipw2100_wx_set_rate(struct net_device *dev, IPW_DEBUG_WX("SET Rate -> %04X \n", rate); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7016,7 +7116,7 @@ static int ipw2100_wx_get_rate(struct net_device *dev, return 0; } - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7048,7 +7148,7 @@ static int ipw2100_wx_get_rate(struct net_device *dev, IPW_DEBUG_WX("GET Rate -> %d \n", wrqu->bitrate.value); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7063,7 +7163,7 @@ static int ipw2100_wx_set_rts(struct net_device *dev, if (wrqu->rts.fixed == 0) return -EINVAL; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7083,7 +7183,7 @@ static int ipw2100_wx_set_rts(struct net_device *dev, IPW_DEBUG_WX("SET RTS Threshold -> 0x%08X \n", value); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7134,7 +7234,7 @@ static int ipw2100_wx_set_txpow(struct net_device *dev, value = wrqu->txpower.value; } - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7145,7 +7245,7 @@ static int ipw2100_wx_set_txpow(struct net_device *dev, IPW_DEBUG_WX("SET TX Power -> %d \n", value); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7237,7 +7337,7 @@ static int ipw2100_wx_set_retry(struct net_device *dev, if (!(wrqu->retry.flags & IW_RETRY_LIMIT)) return 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7264,7 +7364,7 @@ static int ipw2100_wx_set_retry(struct net_device *dev, IPW_DEBUG_WX("SET Both Retry Limits -> %d \n", wrqu->retry.value); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7307,7 +7407,7 @@ static int ipw2100_wx_set_scan(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); int err = 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7322,7 +7422,7 @@ static int ipw2100_wx_set_scan(struct net_device *dev, } done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7372,7 +7472,7 @@ static int ipw2100_wx_set_power(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); int err = 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7405,7 +7505,7 @@ static int ipw2100_wx_set_power(struct net_device *dev, IPW_DEBUG_WX("SET Power Management Mode -> 0x%02X\n", priv->power_mode); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7709,7 +7809,7 @@ static int ipw2100_wx_set_promisc(struct net_device *dev, int enable = (parms[0] > 0); int err = 0; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7727,7 +7827,7 @@ static int ipw2100_wx_set_promisc(struct net_device *dev, err = ipw2100_switch_mode(priv, priv->last_mode); } done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7750,7 +7850,7 @@ static int ipw2100_wx_set_powermode(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); int err = 0, mode = *(int *)extra; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7762,7 +7862,7 @@ static int ipw2100_wx_set_powermode(struct net_device *dev, if (priv->power_mode != mode) err = ipw2100_set_power_mode(priv, mode); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7814,7 +7914,7 @@ static int ipw2100_wx_set_preamble(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); int err, mode = *(int *)extra; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7832,7 +7932,7 @@ static int ipw2100_wx_set_preamble(struct net_device *dev, err = ipw2100_system_config(priv, 0); done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -7862,7 +7962,7 @@ static int ipw2100_wx_set_crc_check(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); int err, mode = *(int *)extra; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { err = -EIO; goto done; @@ -7879,7 +7979,7 @@ static int ipw2100_wx_set_crc_check(struct net_device *dev, err = 0; done: - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); return err; } @@ -8184,11 +8284,11 @@ static void ipw2100_wx_event_work(struct ipw2100_priv *priv) if (priv->status & STATUS_STOPPING) return; - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); IPW_DEBUG_WX("enter\n"); - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); wrqu.ap_addr.sa_family = ARPHRD_ETHER; @@ -8211,7 +8311,7 @@ static void ipw2100_wx_event_work(struct ipw2100_priv *priv) if (!(priv->status & STATUS_ASSOCIATED)) { IPW_DEBUG_WX("Configuring ESSID\n"); - down(&priv->action_sem); + mutex_lock(&priv->action_mutex); /* This is a disassociation event, so kick the firmware to * look for another AP */ if (priv->config & CFG_STATIC_ESSID) @@ -8219,7 +8319,7 @@ static void ipw2100_wx_event_work(struct ipw2100_priv *priv) 0); else ipw2100_set_essid(priv, NULL, 0, 0); - up(&priv->action_sem); + mutex_unlock(&priv->action_mutex); } wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); diff --git a/drivers/net/wireless/ipw2100.h b/drivers/net/wireless/ipw2100.h index f6c5144..55b7227 100644 --- a/drivers/net/wireless/ipw2100.h +++ b/drivers/net/wireless/ipw2100.h @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright(c) 2003 - 2004 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -41,7 +41,12 @@ #include <net/ieee80211.h> +#ifdef CONFIG_IPW2100_MONITOR +#include <net/ieee80211_radiotap.h> +#endif + #include <linux/workqueue.h> +#include <linux/mutex.h> struct ipw2100_priv; struct ipw2100_tx_packet; @@ -392,8 +397,10 @@ struct ipw2100_notification { #define IPW_WEP104_CIPHER (1<<5) #define IPW_CKIP_CIPHER (1<<6) -#define IPW_AUTH_OPEN 0 -#define IPW_AUTH_SHARED 1 +#define IPW_AUTH_OPEN 0 +#define IPW_AUTH_SHARED 1 +#define IPW_AUTH_LEAP 2 +#define IPW_AUTH_LEAP_CISCO_ID 0x80 struct statistic { int value; @@ -588,8 +595,8 @@ struct ipw2100_priv { int inta_other; spinlock_t low_lock; - struct semaphore action_sem; - struct semaphore adapter_sem; + struct mutex action_mutex; + struct mutex adapter_mutex; wait_queue_head_t wait_command_queue; }; diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 287676a..9dce522 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved. 802.11 status code portion of this file from ethereal-0.10.6: Copyright 2000, Axis Communications AB @@ -33,9 +33,9 @@ #include "ipw2200.h" #include <linux/version.h> -#define IPW2200_VERSION "git-1.0.8" +#define IPW2200_VERSION "git-1.1.1" #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2200/2915 Network Driver" -#define DRV_COPYRIGHT "Copyright(c) 2003-2005 Intel Corporation" +#define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" #define DRV_VERSION IPW2200_VERSION #define ETH_P_80211_STATS (ETH_P_80211_RAW + 1) @@ -55,7 +55,9 @@ static int associate = 1; static int auto_create = 1; static int led = 0; static int disable = 0; -static int hwcrypto = 1; +static int bt_coexist = 0; +static int hwcrypto = 0; +static int roaming = 1; static const char ipw_modes[] = { 'a', 'b', 'g', '?' }; @@ -151,12 +153,6 @@ static int init_supported_rates(struct ipw_priv *priv, static void ipw_set_hwcrypto_keys(struct ipw_priv *); static void ipw_send_wep_keys(struct ipw_priv *, int); -static int ipw_is_valid_channel(struct ieee80211_device *, u8); -static int ipw_channel_to_index(struct ieee80211_device *, u8); -static u8 ipw_freq_to_channel(struct ieee80211_device *, u32); -static int ipw_set_geo(struct ieee80211_device *, const struct ieee80211_geo *); -static const struct ieee80211_geo *ipw_get_geo(struct ieee80211_device *); - static int snprint_line(char *buf, size_t count, const u8 * data, u32 len, u32 ofs) { @@ -227,12 +223,15 @@ static int snprintk_buf(u8 * output, size_t size, const u8 * data, size_t len) return total; } +/* alias for 32-bit indirect read (for SRAM/reg above 4K), with debug wrapper */ static u32 _ipw_read_reg32(struct ipw_priv *priv, u32 reg); #define ipw_read_reg32(a, b) _ipw_read_reg32(a, b) +/* alias for 8-bit indirect read (for SRAM/reg above 4K), with debug wrapper */ static u8 _ipw_read_reg8(struct ipw_priv *ipw, u32 reg); #define ipw_read_reg8(a, b) _ipw_read_reg8(a, b) +/* 8-bit indirect write (for SRAM/reg above 4K), with debug wrapper */ static void _ipw_write_reg8(struct ipw_priv *priv, u32 reg, u8 value); static inline void ipw_write_reg8(struct ipw_priv *a, u32 b, u8 c) { @@ -241,6 +240,7 @@ static inline void ipw_write_reg8(struct ipw_priv *a, u32 b, u8 c) _ipw_write_reg8(a, b, c); } +/* 16-bit indirect write (for SRAM/reg above 4K), with debug wrapper */ static void _ipw_write_reg16(struct ipw_priv *priv, u32 reg, u16 value); static inline void ipw_write_reg16(struct ipw_priv *a, u32 b, u16 c) { @@ -249,6 +249,7 @@ static inline void ipw_write_reg16(struct ipw_priv *a, u32 b, u16 c) _ipw_write_reg16(a, b, c); } +/* 32-bit indirect write (for SRAM/reg above 4K), with debug wrapper */ static void _ipw_write_reg32(struct ipw_priv *priv, u32 reg, u32 value); static inline void ipw_write_reg32(struct ipw_priv *a, u32 b, u32 c) { @@ -257,48 +258,70 @@ static inline void ipw_write_reg32(struct ipw_priv *a, u32 b, u32 c) _ipw_write_reg32(a, b, c); } +/* 8-bit direct write (low 4K) */ #define _ipw_write8(ipw, ofs, val) writeb((val), (ipw)->hw_base + (ofs)) + +/* 8-bit direct write (for low 4K of SRAM/regs), with debug wrapper */ #define ipw_write8(ipw, ofs, val) \ IPW_DEBUG_IO("%s %d: write_direct8(0x%08X, 0x%08X)\n", __FILE__, __LINE__, (u32)(ofs), (u32)(val)); \ _ipw_write8(ipw, ofs, val) +/* 16-bit direct write (low 4K) */ #define _ipw_write16(ipw, ofs, val) writew((val), (ipw)->hw_base + (ofs)) + +/* 16-bit direct write (for low 4K of SRAM/regs), with debug wrapper */ #define ipw_write16(ipw, ofs, val) \ IPW_DEBUG_IO("%s %d: write_direct16(0x%08X, 0x%08X)\n", __FILE__, __LINE__, (u32)(ofs), (u32)(val)); \ _ipw_write16(ipw, ofs, val) +/* 32-bit direct write (low 4K) */ #define _ipw_write32(ipw, ofs, val) writel((val), (ipw)->hw_base + (ofs)) + +/* 32-bit direct write (for low 4K of SRAM/regs), with debug wrapper */ #define ipw_write32(ipw, ofs, val) \ IPW_DEBUG_IO("%s %d: write_direct32(0x%08X, 0x%08X)\n", __FILE__, __LINE__, (u32)(ofs), (u32)(val)); \ _ipw_write32(ipw, ofs, val) +/* 8-bit direct read (low 4K) */ #define _ipw_read8(ipw, ofs) readb((ipw)->hw_base + (ofs)) + +/* 8-bit direct read (low 4K), with debug wrapper */ static inline u8 __ipw_read8(char *f, u32 l, struct ipw_priv *ipw, u32 ofs) { IPW_DEBUG_IO("%s %d: read_direct8(0x%08X)\n", f, l, (u32) (ofs)); return _ipw_read8(ipw, ofs); } +/* alias to 8-bit direct read (low 4K of SRAM/regs), with debug wrapper */ #define ipw_read8(ipw, ofs) __ipw_read8(__FILE__, __LINE__, ipw, ofs) +/* 16-bit direct read (low 4K) */ #define _ipw_read16(ipw, ofs) readw((ipw)->hw_base + (ofs)) + +/* 16-bit direct read (low 4K), with debug wrapper */ static inline u16 __ipw_read16(char *f, u32 l, struct ipw_priv *ipw, u32 ofs) { IPW_DEBUG_IO("%s %d: read_direct16(0x%08X)\n", f, l, (u32) (ofs)); return _ipw_read16(ipw, ofs); } +/* alias to 16-bit direct read (low 4K of SRAM/regs), with debug wrapper */ #define ipw_read16(ipw, ofs) __ipw_read16(__FILE__, __LINE__, ipw, ofs) +/* 32-bit direct read (low 4K) */ #define _ipw_read32(ipw, ofs) readl((ipw)->hw_base + (ofs)) + +/* 32-bit direct read (low 4K), with debug wrapper */ static inline u32 __ipw_read32(char *f, u32 l, struct ipw_priv *ipw, u32 ofs) { IPW_DEBUG_IO("%s %d: read_direct32(0x%08X)\n", f, l, (u32) (ofs)); return _ipw_read32(ipw, ofs); } +/* alias to 32-bit direct read (low 4K of SRAM/regs), with debug wrapper */ #define ipw_read32(ipw, ofs) __ipw_read32(__FILE__, __LINE__, ipw, ofs) +/* multi-byte read (above 4K), with debug wrapper */ static void _ipw_read_indirect(struct ipw_priv *, u32, u8 *, int); static inline void __ipw_read_indirect(const char *f, int l, struct ipw_priv *a, u32 b, u8 * c, int d) @@ -308,15 +331,17 @@ static inline void __ipw_read_indirect(const char *f, int l, _ipw_read_indirect(a, b, c, d); } +/* alias to multi-byte read (SRAM/regs above 4K), with debug wrapper */ #define ipw_read_indirect(a, b, c, d) __ipw_read_indirect(__FILE__, __LINE__, a, b, c, d) +/* alias to multi-byte read (SRAM/regs above 4K), with debug wrapper */ static void _ipw_write_indirect(struct ipw_priv *priv, u32 addr, u8 * data, int num); #define ipw_write_indirect(a, b, c, d) \ IPW_DEBUG_IO("%s %d: write_indirect(0x%08X) %d bytes\n", __FILE__, __LINE__, (u32)(b), d); \ _ipw_write_indirect(a, b, c, d) -/* indirect write s */ +/* 32-bit indirect write (above 4K) */ static void _ipw_write_reg32(struct ipw_priv *priv, u32 reg, u32 value) { IPW_DEBUG_IO(" %p : reg = 0x%8X : value = 0x%8X\n", priv, reg, value); @@ -324,22 +349,29 @@ static void _ipw_write_reg32(struct ipw_priv *priv, u32 reg, u32 value) _ipw_write32(priv, IPW_INDIRECT_DATA, value); } +/* 8-bit indirect write (above 4K) */ static void _ipw_write_reg8(struct ipw_priv *priv, u32 reg, u8 value) { + u32 aligned_addr = reg & IPW_INDIRECT_ADDR_MASK; /* dword align */ + u32 dif_len = reg - aligned_addr; + IPW_DEBUG_IO(" reg = 0x%8X : value = 0x%8X\n", reg, value); - _ipw_write32(priv, IPW_INDIRECT_ADDR, reg & IPW_INDIRECT_ADDR_MASK); - _ipw_write8(priv, IPW_INDIRECT_DATA, value); + _ipw_write32(priv, IPW_INDIRECT_ADDR, aligned_addr); + _ipw_write8(priv, IPW_INDIRECT_DATA + dif_len, value); } +/* 16-bit indirect write (above 4K) */ static void _ipw_write_reg16(struct ipw_priv *priv, u32 reg, u16 value) { + u32 aligned_addr = reg & IPW_INDIRECT_ADDR_MASK; /* dword align */ + u32 dif_len = (reg - aligned_addr) & (~0x1ul); + IPW_DEBUG_IO(" reg = 0x%8X : value = 0x%8X\n", reg, value); - _ipw_write32(priv, IPW_INDIRECT_ADDR, reg & IPW_INDIRECT_ADDR_MASK); - _ipw_write16(priv, IPW_INDIRECT_DATA, value); + _ipw_write32(priv, IPW_INDIRECT_ADDR, aligned_addr); + _ipw_write16(priv, IPW_INDIRECT_DATA + dif_len, value); } -/* indirect read s */ - +/* 8-bit indirect read (above 4K) */ static u8 _ipw_read_reg8(struct ipw_priv *priv, u32 reg) { u32 word; @@ -349,6 +381,7 @@ static u8 _ipw_read_reg8(struct ipw_priv *priv, u32 reg) return (word >> ((reg & 0x3) * 8)) & 0xff; } +/* 32-bit indirect read (above 4K) */ static u32 _ipw_read_reg32(struct ipw_priv *priv, u32 reg) { u32 value; @@ -361,11 +394,12 @@ static u32 _ipw_read_reg32(struct ipw_priv *priv, u32 reg) return value; } -/* iterative/auto-increment 32 bit reads and writes */ +/* General purpose, no alignment requirement, iterative (multi-byte) read, */ +/* for area above 1st 4K of SRAM/reg space */ static void _ipw_read_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, int num) { - u32 aligned_addr = addr & IPW_INDIRECT_ADDR_MASK; + u32 aligned_addr = addr & IPW_INDIRECT_ADDR_MASK; /* dword align */ u32 dif_len = addr - aligned_addr; u32 i; @@ -375,7 +409,7 @@ static void _ipw_read_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, return; } - /* Read the first nibble byte by byte */ + /* Read the first dword (or portion) byte by byte */ if (unlikely(dif_len)) { _ipw_write32(priv, IPW_INDIRECT_ADDR, aligned_addr); /* Start reading at aligned_addr + dif_len */ @@ -384,11 +418,12 @@ static void _ipw_read_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, aligned_addr += 4; } + /* Read all of the middle dwords as dwords, with auto-increment */ _ipw_write32(priv, IPW_AUTOINC_ADDR, aligned_addr); for (; num >= 4; buf += 4, aligned_addr += 4, num -= 4) *(u32 *) buf = _ipw_read32(priv, IPW_AUTOINC_DATA); - /* Copy the last nibble */ + /* Read the last dword (or portion) byte by byte */ if (unlikely(num)) { _ipw_write32(priv, IPW_INDIRECT_ADDR, aligned_addr); for (i = 0; num > 0; i++, num--) @@ -396,10 +431,12 @@ static void _ipw_read_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, } } +/* General purpose, no alignment requirement, iterative (multi-byte) write, */ +/* for area above 1st 4K of SRAM/reg space */ static void _ipw_write_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, int num) { - u32 aligned_addr = addr & IPW_INDIRECT_ADDR_MASK; + u32 aligned_addr = addr & IPW_INDIRECT_ADDR_MASK; /* dword align */ u32 dif_len = addr - aligned_addr; u32 i; @@ -409,20 +446,21 @@ static void _ipw_write_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, return; } - /* Write the first nibble byte by byte */ + /* Write the first dword (or portion) byte by byte */ if (unlikely(dif_len)) { _ipw_write32(priv, IPW_INDIRECT_ADDR, aligned_addr); - /* Start reading at aligned_addr + dif_len */ + /* Start writing at aligned_addr + dif_len */ for (i = dif_len; ((i < 4) && (num > 0)); i++, num--, buf++) _ipw_write8(priv, IPW_INDIRECT_DATA + i, *buf); aligned_addr += 4; } + /* Write all of the middle dwords as dwords, with auto-increment */ _ipw_write32(priv, IPW_AUTOINC_ADDR, aligned_addr); for (; num >= 4; buf += 4, aligned_addr += 4, num -= 4) _ipw_write32(priv, IPW_AUTOINC_DATA, *(u32 *) buf); - /* Copy the last nibble */ + /* Write the last dword (or portion) byte by byte */ if (unlikely(num)) { _ipw_write32(priv, IPW_INDIRECT_ADDR, aligned_addr); for (i = 0; num > 0; i++, num--, buf++) @@ -430,17 +468,21 @@ static void _ipw_write_indirect(struct ipw_priv *priv, u32 addr, u8 * buf, } } +/* General purpose, no alignment requirement, iterative (multi-byte) write, */ +/* for 1st 4K of SRAM/regs space */ static void ipw_write_direct(struct ipw_priv *priv, u32 addr, void *buf, int num) { memcpy_toio((priv->hw_base + addr), buf, num); } +/* Set bit(s) in low 4K of SRAM/regs */ static inline void ipw_set_bit(struct ipw_priv *priv, u32 reg, u32 mask) { ipw_write32(priv, reg, ipw_read32(priv, reg) | mask); } +/* Clear bit(s) in low 4K of SRAM/regs */ static inline void ipw_clear_bit(struct ipw_priv *priv, u32 reg, u32 mask) { ipw_write32(priv, reg, ipw_read32(priv, reg) & ~mask); @@ -701,7 +743,7 @@ static void ipw_init_ordinals(struct ipw_priv *priv) } -u32 ipw_register_toggle(u32 reg) +static u32 ipw_register_toggle(u32 reg) { reg &= ~IPW_START_STANDBY; if (reg & IPW_GATE_ODMA) @@ -722,11 +764,11 @@ u32 ipw_register_toggle(u32 reg) * - On radio OFF, turn off any LEDs started during radio on * */ -#define LD_TIME_LINK_ON 300 -#define LD_TIME_LINK_OFF 2700 -#define LD_TIME_ACT_ON 250 +#define LD_TIME_LINK_ON msecs_to_jiffies(300) +#define LD_TIME_LINK_OFF msecs_to_jiffies(2700) +#define LD_TIME_ACT_ON msecs_to_jiffies(250) -void ipw_led_link_on(struct ipw_priv *priv) +static void ipw_led_link_on(struct ipw_priv *priv) { unsigned long flags; u32 led; @@ -764,12 +806,12 @@ void ipw_led_link_on(struct ipw_priv *priv) static void ipw_bg_led_link_on(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_led_link_on(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } -void ipw_led_link_off(struct ipw_priv *priv) +static void ipw_led_link_off(struct ipw_priv *priv) { unsigned long flags; u32 led; @@ -808,9 +850,9 @@ void ipw_led_link_off(struct ipw_priv *priv) static void ipw_bg_led_link_off(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_led_link_off(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static void __ipw_led_activity_on(struct ipw_priv *priv) @@ -847,6 +889,7 @@ static void __ipw_led_activity_on(struct ipw_priv *priv) } } +#if 0 void ipw_led_activity_on(struct ipw_priv *priv) { unsigned long flags; @@ -854,8 +897,9 @@ void ipw_led_activity_on(struct ipw_priv *priv) __ipw_led_activity_on(priv); spin_unlock_irqrestore(&priv->lock, flags); } +#endif /* 0 */ -void ipw_led_activity_off(struct ipw_priv *priv) +static void ipw_led_activity_off(struct ipw_priv *priv) { unsigned long flags; u32 led; @@ -885,12 +929,12 @@ void ipw_led_activity_off(struct ipw_priv *priv) static void ipw_bg_led_activity_off(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_led_activity_off(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } -void ipw_led_band_on(struct ipw_priv *priv) +static void ipw_led_band_on(struct ipw_priv *priv) { unsigned long flags; u32 led; @@ -925,7 +969,7 @@ void ipw_led_band_on(struct ipw_priv *priv) spin_unlock_irqrestore(&priv->lock, flags); } -void ipw_led_band_off(struct ipw_priv *priv) +static void ipw_led_band_off(struct ipw_priv *priv) { unsigned long flags; u32 led; @@ -948,24 +992,24 @@ void ipw_led_band_off(struct ipw_priv *priv) spin_unlock_irqrestore(&priv->lock, flags); } -void ipw_led_radio_on(struct ipw_priv *priv) +static void ipw_led_radio_on(struct ipw_priv *priv) { ipw_led_link_on(priv); } -void ipw_led_radio_off(struct ipw_priv *priv) +static void ipw_led_radio_off(struct ipw_priv *priv) { ipw_led_activity_off(priv); ipw_led_link_off(priv); } -void ipw_led_link_up(struct ipw_priv *priv) +static void ipw_led_link_up(struct ipw_priv *priv) { /* Set the Link Led on for all nic types */ ipw_led_link_on(priv); } -void ipw_led_link_down(struct ipw_priv *priv) +static void ipw_led_link_down(struct ipw_priv *priv) { ipw_led_activity_off(priv); ipw_led_link_off(priv); @@ -974,7 +1018,7 @@ void ipw_led_link_down(struct ipw_priv *priv) ipw_led_radio_off(priv); } -void ipw_led_init(struct ipw_priv *priv) +static void ipw_led_init(struct ipw_priv *priv) { priv->nic_type = priv->eeprom[EEPROM_NIC_TYPE]; @@ -1025,7 +1069,7 @@ void ipw_led_init(struct ipw_priv *priv) } } -void ipw_led_shutdown(struct ipw_priv *priv) +static void ipw_led_shutdown(struct ipw_priv *priv) { ipw_led_activity_off(priv); ipw_led_link_off(priv); @@ -1074,6 +1118,7 @@ static DRIVER_ATTR(debug_level, S_IWUSR | S_IRUGO, static inline u32 ipw_get_event_log_len(struct ipw_priv *priv) { + /* length = 1st dword in log */ return ipw_read_reg32(priv, ipw_read32(priv, IPW_EVENT_LOG)); } @@ -1603,7 +1648,7 @@ static ssize_t store_speed_scan(struct device *d, struct device_attribute *attr, break; } - if (ipw_is_valid_channel(priv->ieee, channel)) + if (ieee80211_is_valid_channel(priv->ieee, channel)) priv->speed_scan[pos++] = channel; else IPW_WARNING("Skipping invalid channel request: %d\n", @@ -1751,9 +1796,9 @@ static void ipw_irq_tasklet(struct ipw_priv *priv) } if (inta & IPW_INTA_BIT_FATAL_ERROR) { - IPW_ERROR("Firmware error detected. Restarting.\n"); + IPW_WARNING("Firmware error detected. Restarting.\n"); if (priv->error) { - IPW_ERROR("Sysfs 'error' log already exists.\n"); + IPW_DEBUG_FW("Sysfs 'error' log already exists.\n"); #ifdef CONFIG_IPW2200_DEBUG if (ipw_debug_level & IPW_DL_FW_ERRORS) { struct ipw_fw_error *error = @@ -1766,10 +1811,10 @@ static void ipw_irq_tasklet(struct ipw_priv *priv) } else { priv->error = ipw_alloc_error_log(priv); if (priv->error) - IPW_ERROR("Sysfs 'error' log captured.\n"); + IPW_DEBUG_FW("Sysfs 'error' log captured.\n"); else - IPW_ERROR("Error allocating sysfs 'error' " - "log.\n"); + IPW_DEBUG_FW("Error allocating sysfs 'error' " + "log.\n"); #ifdef CONFIG_IPW2200_DEBUG if (ipw_debug_level & IPW_DL_FW_ERRORS) ipw_dump_error_log(priv, priv->error); @@ -1870,7 +1915,8 @@ static char *get_cmd_string(u8 cmd) } #define HOST_COMPLETE_TIMEOUT HZ -static int ipw_send_cmd(struct ipw_priv *priv, struct host_cmd *cmd) + +static int __ipw_send_cmd(struct ipw_priv *priv, struct host_cmd *cmd) { int rc = 0; unsigned long flags; @@ -1897,9 +1943,15 @@ static int ipw_send_cmd(struct ipw_priv *priv, struct host_cmd *cmd) IPW_DEBUG_HC("%s command (#%d) %d bytes: 0x%08X\n", get_cmd_string(cmd->cmd), cmd->cmd, cmd->len, priv->status); - printk_buf(IPW_DL_HOST_COMMAND, (u8 *) cmd->param, cmd->len); - rc = ipw_queue_tx_hcmd(priv, cmd->cmd, &cmd->param, cmd->len, 0); +#ifndef DEBUG_CMD_WEP_KEY + if (cmd->cmd == IPW_CMD_WEP_KEY) + IPW_DEBUG_HC("WEP_KEY command masked out for secure.\n"); + else +#endif + printk_buf(IPW_DL_HOST_COMMAND, (u8 *) cmd->param, cmd->len); + + rc = ipw_queue_tx_hcmd(priv, cmd->cmd, cmd->param, cmd->len, 0); if (rc) { priv->status &= ~STATUS_HCMD_ACTIVE; IPW_ERROR("Failed to send %s: Reason %d\n", @@ -1942,61 +1994,62 @@ static int ipw_send_cmd(struct ipw_priv *priv, struct host_cmd *cmd) return rc; } -static int ipw_send_host_complete(struct ipw_priv *priv) +static int ipw_send_cmd_simple(struct ipw_priv *priv, u8 command) +{ + struct host_cmd cmd = { + .cmd = command, + }; + + return __ipw_send_cmd(priv, &cmd); +} + +static int ipw_send_cmd_pdu(struct ipw_priv *priv, u8 command, u8 len, + void *data) { struct host_cmd cmd = { - .cmd = IPW_CMD_HOST_COMPLETE, - .len = 0 + .cmd = command, + .len = len, + .param = data, }; + return __ipw_send_cmd(priv, &cmd); +} + +static int ipw_send_host_complete(struct ipw_priv *priv) +{ if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_simple(priv, IPW_CMD_HOST_COMPLETE); } static int ipw_send_system_config(struct ipw_priv *priv, struct ipw_sys_config *config) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SYSTEM_CONFIG, - .len = sizeof(*config) - }; - if (!priv || !config) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, config, sizeof(*config)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_SYSTEM_CONFIG, sizeof(*config), + config); } static int ipw_send_ssid(struct ipw_priv *priv, u8 * ssid, int len) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SSID, - .len = min(len, IW_ESSID_MAX_SIZE) - }; - if (!priv || !ssid) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, ssid, cmd.len); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_SSID, min(len, IW_ESSID_MAX_SIZE), + ssid); } static int ipw_send_adapter_address(struct ipw_priv *priv, u8 * mac) { - struct host_cmd cmd = { - .cmd = IPW_CMD_ADAPTER_ADDRESS, - .len = ETH_ALEN - }; - if (!priv || !mac) { IPW_ERROR("Invalid args\n"); return -1; @@ -2005,8 +2058,7 @@ static int ipw_send_adapter_address(struct ipw_priv *priv, u8 * mac) IPW_DEBUG_INFO("%s: Setting MAC to " MAC_FMT "\n", priv->net_dev->name, MAC_ARG(mac)); - memcpy(cmd.param, mac, ETH_ALEN); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_ADAPTER_ADDRESS, ETH_ALEN, mac); } /* @@ -2036,9 +2088,9 @@ static void ipw_adapter_restart(void *adapter) static void ipw_bg_adapter_restart(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_adapter_restart(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } #define IPW_SCAN_CHECK_WATCHDOG (5 * HZ) @@ -2048,8 +2100,8 @@ static void ipw_scan_check(void *data) struct ipw_priv *priv = data; if (priv->status & (STATUS_SCANNING | STATUS_SCAN_ABORTING)) { IPW_DEBUG_SCAN("Scan completion watchdog resetting " - "adapter (%dms).\n", - IPW_SCAN_CHECK_WATCHDOG / 100); + "adapter after (%dms).\n", + jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG)); queue_work(priv->workqueue, &priv->adapter_restart); } } @@ -2057,59 +2109,48 @@ static void ipw_scan_check(void *data) static void ipw_bg_scan_check(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_scan_check(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static int ipw_send_scan_request_ext(struct ipw_priv *priv, struct ipw_scan_request_ext *request) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SCAN_REQUEST_EXT, - .len = sizeof(*request) - }; - - memcpy(cmd.param, request, sizeof(*request)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_SCAN_REQUEST_EXT, + sizeof(*request), request); } static int ipw_send_scan_abort(struct ipw_priv *priv) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SCAN_ABORT, - .len = 0 - }; - if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_simple(priv, IPW_CMD_SCAN_ABORT); } static int ipw_set_sensitivity(struct ipw_priv *priv, u16 sens) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SENSITIVITY_CALIB, - .len = sizeof(struct ipw_sensitivity_calib) + struct ipw_sensitivity_calib calib = { + .beacon_rssi_raw = sens, }; - struct ipw_sensitivity_calib *calib = (struct ipw_sensitivity_calib *) - &cmd.param; - calib->beacon_rssi_raw = sens; - return ipw_send_cmd(priv, &cmd); + + return ipw_send_cmd_pdu(priv, IPW_CMD_SENSITIVITY_CALIB, sizeof(calib), + &calib); } static int ipw_send_associate(struct ipw_priv *priv, struct ipw_associate *associate) { - struct host_cmd cmd = { - .cmd = IPW_CMD_ASSOCIATE, - .len = sizeof(*associate) - }; - struct ipw_associate tmp_associate; + + if (!priv || !associate) { + IPW_ERROR("Invalid args\n"); + return -1; + } + memcpy(&tmp_associate, associate, sizeof(*associate)); tmp_associate.policy_support = cpu_to_le16(tmp_associate.policy_support); @@ -2122,85 +2163,60 @@ static int ipw_send_associate(struct ipw_priv *priv, cpu_to_le16(tmp_associate.beacon_interval); tmp_associate.atim_window = cpu_to_le16(tmp_associate.atim_window); - if (!priv || !associate) { - IPW_ERROR("Invalid args\n"); - return -1; - } - - memcpy(cmd.param, &tmp_associate, sizeof(*associate)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_ASSOCIATE, sizeof(tmp_associate), + &tmp_associate); } static int ipw_send_supported_rates(struct ipw_priv *priv, struct ipw_supported_rates *rates) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SUPPORTED_RATES, - .len = sizeof(*rates) - }; - if (!priv || !rates) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, rates, sizeof(*rates)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_SUPPORTED_RATES, sizeof(*rates), + rates); } static int ipw_set_random_seed(struct ipw_priv *priv) { - struct host_cmd cmd = { - .cmd = IPW_CMD_SEED_NUMBER, - .len = sizeof(u32) - }; + u32 val; if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - get_random_bytes(&cmd.param, sizeof(u32)); + get_random_bytes(&val, sizeof(val)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_SEED_NUMBER, sizeof(val), &val); } static int ipw_send_card_disable(struct ipw_priv *priv, u32 phy_off) { - struct host_cmd cmd = { - .cmd = IPW_CMD_CARD_DISABLE, - .len = sizeof(u32) - }; - if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - *((u32 *) & cmd.param) = phy_off; - - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_CARD_DISABLE, sizeof(phy_off), + &phy_off); } static int ipw_send_tx_power(struct ipw_priv *priv, struct ipw_tx_power *power) { - struct host_cmd cmd = { - .cmd = IPW_CMD_TX_POWER, - .len = sizeof(*power) - }; - if (!priv || !power) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, power, sizeof(*power)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_TX_POWER, sizeof(*power), power); } static int ipw_set_tx_power(struct ipw_priv *priv) { - const struct ieee80211_geo *geo = ipw_get_geo(priv->ieee); + const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee); struct ipw_tx_power tx_power; s8 max_power; int i; @@ -2247,18 +2263,14 @@ static int ipw_send_rts_threshold(struct ipw_priv *priv, u16 rts) struct ipw_rts_threshold rts_threshold = { .rts_threshold = rts, }; - struct host_cmd cmd = { - .cmd = IPW_CMD_RTS_THRESHOLD, - .len = sizeof(rts_threshold) - }; if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, &rts_threshold, sizeof(rts_threshold)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_RTS_THRESHOLD, + sizeof(rts_threshold), &rts_threshold); } static int ipw_send_frag_threshold(struct ipw_priv *priv, u16 frag) @@ -2266,27 +2278,19 @@ static int ipw_send_frag_threshold(struct ipw_priv *priv, u16 frag) struct ipw_frag_threshold frag_threshold = { .frag_threshold = frag, }; - struct host_cmd cmd = { - .cmd = IPW_CMD_FRAG_THRESHOLD, - .len = sizeof(frag_threshold) - }; if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, &frag_threshold, sizeof(frag_threshold)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_FRAG_THRESHOLD, + sizeof(frag_threshold), &frag_threshold); } static int ipw_send_power_mode(struct ipw_priv *priv, u32 mode) { - struct host_cmd cmd = { - .cmd = IPW_CMD_POWER_MODE, - .len = sizeof(u32) - }; - u32 *param = (u32 *) (&cmd.param); + u32 param; if (!priv) { IPW_ERROR("Invalid args\n"); @@ -2297,17 +2301,18 @@ static int ipw_send_power_mode(struct ipw_priv *priv, u32 mode) * level */ switch (mode) { case IPW_POWER_BATTERY: - *param = IPW_POWER_INDEX_3; + param = IPW_POWER_INDEX_3; break; case IPW_POWER_AC: - *param = IPW_POWER_MODE_CAM; + param = IPW_POWER_MODE_CAM; break; default: - *param = mode; + param = mode; break; } - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_POWER_MODE, sizeof(param), + ¶m); } static int ipw_send_retry_limit(struct ipw_priv *priv, u8 slimit, u8 llimit) @@ -2316,18 +2321,14 @@ static int ipw_send_retry_limit(struct ipw_priv *priv, u8 slimit, u8 llimit) .short_retry_limit = slimit, .long_retry_limit = llimit }; - struct host_cmd cmd = { - .cmd = IPW_CMD_RETRY_LIMIT, - .len = sizeof(retry_limit) - }; if (!priv) { IPW_ERROR("Invalid args\n"); return -1; } - memcpy(cmd.param, &retry_limit, sizeof(retry_limit)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_RETRY_LIMIT, sizeof(retry_limit), + &retry_limit); } /* @@ -2454,7 +2455,7 @@ static void ipw_eeprom_init_sram(struct ipw_priv *priv) /* If the data looks correct, then copy it to our private copy. Otherwise let the firmware know to perform the operation - on it's own + on its own. */ if (priv->eeprom[EEPROM_VERSION] != 0) { IPW_DEBUG_INFO("Writing EEPROM data into SRAM\n"); @@ -2707,22 +2708,25 @@ static int ipw_fw_dma_add_buffer(struct ipw_priv *priv, static int ipw_fw_dma_wait(struct ipw_priv *priv) { - u32 current_index = 0; + u32 current_index = 0, previous_index; u32 watchdog = 0; IPW_DEBUG_FW(">> : \n"); current_index = ipw_fw_dma_command_block_index(priv); - IPW_DEBUG_FW_INFO("sram_desc.last_cb_index:0x%8X\n", + IPW_DEBUG_FW_INFO("sram_desc.last_cb_index:0x%08X\n", (int)priv->sram_desc.last_cb_index); while (current_index < priv->sram_desc.last_cb_index) { udelay(50); + previous_index = current_index; current_index = ipw_fw_dma_command_block_index(priv); - watchdog++; - - if (watchdog > 400) { + if (previous_index < current_index) { + watchdog = 0; + continue; + } + if (++watchdog > 400) { IPW_DEBUG_FW_INFO("Timeout\n"); ipw_fw_dma_dump_command_block(priv); ipw_fw_dma_abort(priv); @@ -2772,6 +2776,7 @@ static inline int ipw_alive(struct ipw_priv *priv) return ipw_read32(priv, 0x90) == 0xd55555d5; } +/* timeout in msec, attempted in 10-msec quanta */ static int ipw_poll_bit(struct ipw_priv *priv, u32 addr, u32 mask, int timeout) { @@ -2800,10 +2805,11 @@ static int ipw_stop_master(struct ipw_priv *priv) /* stop master. typical delay - 0 */ ipw_set_bit(priv, IPW_RESET_REG, IPW_RESET_REG_STOP_MASTER); + /* timeout is in msec, polled in 10-msec quanta */ rc = ipw_poll_bit(priv, IPW_RESET_REG, IPW_RESET_REG_MASTER_DISABLED, 100); if (rc < 0) { - IPW_ERROR("stop master failed in 10ms\n"); + IPW_ERROR("wait for stop master failed after 100ms\n"); return -1; } @@ -2823,33 +2829,11 @@ static void ipw_arc_release(struct ipw_priv *priv) mdelay(5); } -struct fw_header { - u32 version; - u32 mode; -}; - struct fw_chunk { u32 address; u32 length; }; -#define IPW_FW_MAJOR_VERSION 2 -#define IPW_FW_MINOR_VERSION 4 - -#define IPW_FW_MINOR(x) ((x & 0xff) >> 8) -#define IPW_FW_MAJOR(x) (x & 0xff) - -#define IPW_FW_VERSION ((IPW_FW_MINOR_VERSION << 8) | IPW_FW_MAJOR_VERSION) - -#define IPW_FW_PREFIX "ipw-" __stringify(IPW_FW_MAJOR_VERSION) \ -"." __stringify(IPW_FW_MINOR_VERSION) "-" - -#if IPW_FW_MAJOR_VERSION >= 2 && IPW_FW_MINOR_VERSION > 0 -#define IPW_FW_NAME(x) IPW_FW_PREFIX "" x ".fw" -#else -#define IPW_FW_NAME(x) "ipw2200_" x ".fw" -#endif - static int ipw_load_ucode(struct ipw_priv *priv, u8 * data, size_t len) { int rc = 0, i, addr; @@ -2890,8 +2874,8 @@ static int ipw_load_ucode(struct ipw_priv *priv, u8 * data, size_t len) mdelay(1); /* enable ucode store */ - ipw_write_reg8(priv, DINO_CONTROL_REG, 0x0); - ipw_write_reg8(priv, DINO_CONTROL_REG, DINO_ENABLE_CS); + ipw_write_reg8(priv, IPW_BASEBAND_CONTROL_STATUS, 0x0); + ipw_write_reg8(priv, IPW_BASEBAND_CONTROL_STATUS, DINO_ENABLE_CS); mdelay(1); /* write ucode */ @@ -3036,7 +3020,7 @@ static int ipw_stop_nic(struct ipw_priv *priv) rc = ipw_poll_bit(priv, IPW_RESET_REG, IPW_RESET_REG_MASTER_DISABLED, 500); if (rc < 0) { - IPW_ERROR("wait for reg master disabled failed\n"); + IPW_ERROR("wait for reg master disabled failed after 500ms\n"); return rc; } @@ -3118,33 +3102,47 @@ static int ipw_reset_nic(struct ipw_priv *priv) return rc; } + +struct ipw_fw { + u32 ver; + u32 boot_size; + u32 ucode_size; + u32 fw_size; + u8 data[0]; +}; + static int ipw_get_fw(struct ipw_priv *priv, - const struct firmware **fw, const char *name) + const struct firmware **raw, const char *name) { - struct fw_header *header; + struct ipw_fw *fw; int rc; /* ask firmware_class module to get the boot firmware off disk */ - rc = request_firmware(fw, name, &priv->pci_dev->dev); + rc = request_firmware(raw, name, &priv->pci_dev->dev); if (rc < 0) { - IPW_ERROR("%s load failed: Reason %d\n", name, rc); + IPW_ERROR("%s request_firmware failed: Reason %d\n", name, rc); return rc; } - header = (struct fw_header *)(*fw)->data; - if (IPW_FW_MAJOR(le32_to_cpu(header->version)) != IPW_FW_MAJOR_VERSION) { - IPW_ERROR("'%s' firmware version not compatible (%d != %d)\n", - name, - IPW_FW_MAJOR(le32_to_cpu(header->version)), - IPW_FW_MAJOR_VERSION); + if ((*raw)->size < sizeof(*fw)) { + IPW_ERROR("%s is too small (%zd)\n", name, (*raw)->size); + return -EINVAL; + } + + fw = (void *)(*raw)->data; + + if ((*raw)->size < sizeof(*fw) + + fw->boot_size + fw->ucode_size + fw->fw_size) { + IPW_ERROR("%s is too small or corrupt (%zd)\n", + name, (*raw)->size); return -EINVAL; } - IPW_DEBUG_INFO("Loading firmware '%s' file v%d.%d (%zd bytes)\n", + IPW_DEBUG_INFO("Read firmware '%s' image v%d.%d (%zd bytes)\n", name, - IPW_FW_MAJOR(le32_to_cpu(header->version)), - IPW_FW_MINOR(le32_to_cpu(header->version)), - (*fw)->size - sizeof(struct fw_header)); + le32_to_cpu(fw->ver) >> 16, + le32_to_cpu(fw->ver) & 0xff, + (*raw)->size - sizeof(*fw)); return 0; } @@ -3184,17 +3182,13 @@ static void ipw_rx_queue_reset(struct ipw_priv *priv, #ifdef CONFIG_PM static int fw_loaded = 0; -static const struct firmware *bootfw = NULL; -static const struct firmware *firmware = NULL; -static const struct firmware *ucode = NULL; +static const struct firmware *raw = NULL; static void free_firmware(void) { if (fw_loaded) { - release_firmware(bootfw); - release_firmware(ucode); - release_firmware(firmware); - bootfw = ucode = firmware = NULL; + release_firmware(raw); + raw = NULL; fw_loaded = 0; } } @@ -3205,60 +3199,50 @@ static void free_firmware(void) static int ipw_load(struct ipw_priv *priv) { #ifndef CONFIG_PM - const struct firmware *bootfw = NULL; - const struct firmware *firmware = NULL; - const struct firmware *ucode = NULL; + const struct firmware *raw = NULL; #endif + struct ipw_fw *fw; + u8 *boot_img, *ucode_img, *fw_img; + u8 *name = NULL; int rc = 0, retries = 3; -#ifdef CONFIG_PM - if (!fw_loaded) { -#endif - rc = ipw_get_fw(priv, &bootfw, IPW_FW_NAME("boot")); - if (rc) - goto error; - - switch (priv->ieee->iw_mode) { - case IW_MODE_ADHOC: - rc = ipw_get_fw(priv, &ucode, - IPW_FW_NAME("ibss_ucode")); - if (rc) - goto error; - - rc = ipw_get_fw(priv, &firmware, IPW_FW_NAME("ibss")); - break; - + switch (priv->ieee->iw_mode) { + case IW_MODE_ADHOC: + name = "ipw2200-ibss.fw"; + break; #ifdef CONFIG_IPW2200_MONITOR - case IW_MODE_MONITOR: - rc = ipw_get_fw(priv, &ucode, - IPW_FW_NAME("sniffer_ucode")); - if (rc) - goto error; - - rc = ipw_get_fw(priv, &firmware, - IPW_FW_NAME("sniffer")); - break; + case IW_MODE_MONITOR: + name = "ipw2200-sniffer.fw"; + break; #endif - case IW_MODE_INFRA: - rc = ipw_get_fw(priv, &ucode, IPW_FW_NAME("bss_ucode")); - if (rc) - goto error; - - rc = ipw_get_fw(priv, &firmware, IPW_FW_NAME("bss")); - break; + case IW_MODE_INFRA: + name = "ipw2200-bss.fw"; + break; + } - default: - rc = -EINVAL; - } + if (!name) { + rc = -EINVAL; + goto error; + } - if (rc) +#ifdef CONFIG_PM + if (!fw_loaded) { +#endif + rc = ipw_get_fw(priv, &raw, name); + if (rc < 0) goto error; - #ifdef CONFIG_PM - fw_loaded = 1; } #endif + fw = (void *)raw->data; + boot_img = &fw->data[0]; + ucode_img = &fw->data[fw->boot_size]; + fw_img = &fw->data[fw->boot_size + fw->ucode_size]; + + if (rc < 0) + goto error; + if (!priv->rxq) priv->rxq = ipw_rx_queue_alloc(priv); else @@ -3279,7 +3263,7 @@ static int ipw_load(struct ipw_priv *priv) ipw_stop_nic(priv); rc = ipw_reset_nic(priv); - if (rc) { + if (rc < 0) { IPW_ERROR("Unable to reset NIC\n"); goto error; } @@ -3288,8 +3272,7 @@ static int ipw_load(struct ipw_priv *priv) IPW_NIC_SRAM_UPPER_BOUND - IPW_NIC_SRAM_LOWER_BOUND); /* DMA the initial boot firmware into the device */ - rc = ipw_load_firmware(priv, bootfw->data + sizeof(struct fw_header), - bootfw->size - sizeof(struct fw_header)); + rc = ipw_load_firmware(priv, boot_img, fw->boot_size); if (rc < 0) { IPW_ERROR("Unable to load boot firmware: %d\n", rc); goto error; @@ -3298,7 +3281,7 @@ static int ipw_load(struct ipw_priv *priv) /* kick start the device */ ipw_start_nic(priv); - /* wait for the device to finish it's initial startup sequence */ + /* wait for the device to finish its initial startup sequence */ rc = ipw_poll_bit(priv, IPW_INTA_RW, IPW_INTA_BIT_FW_INITIALIZATION_DONE, 500); if (rc < 0) { @@ -3311,8 +3294,7 @@ static int ipw_load(struct ipw_priv *priv) ipw_write32(priv, IPW_INTA_RW, IPW_INTA_BIT_FW_INITIALIZATION_DONE); /* DMA the ucode into the device */ - rc = ipw_load_ucode(priv, ucode->data + sizeof(struct fw_header), - ucode->size - sizeof(struct fw_header)); + rc = ipw_load_ucode(priv, ucode_img, fw->ucode_size); if (rc < 0) { IPW_ERROR("Unable to load ucode: %d\n", rc); goto error; @@ -3322,18 +3304,19 @@ static int ipw_load(struct ipw_priv *priv) ipw_stop_nic(priv); /* DMA bss firmware into the device */ - rc = ipw_load_firmware(priv, firmware->data + - sizeof(struct fw_header), - firmware->size - sizeof(struct fw_header)); + rc = ipw_load_firmware(priv, fw_img, fw->fw_size); if (rc < 0) { IPW_ERROR("Unable to load firmware: %d\n", rc); goto error; } +#ifdef CONFIG_PM + fw_loaded = 1; +#endif ipw_write32(priv, IPW_EEPROM_LOAD_DISABLE, 0); rc = ipw_queue_reset(priv); - if (rc) { + if (rc < 0) { IPW_ERROR("Unable to initialize queues\n"); goto error; } @@ -3362,7 +3345,7 @@ static int ipw_load(struct ipw_priv *priv) rc = ipw_poll_bit(priv, IPW_INTA_RW, IPW_INTA_BIT_FW_INITIALIZATION_DONE, 500); if (rc < 0) { - IPW_ERROR("device failed to start after 500ms\n"); + IPW_ERROR("device failed to start within 500ms\n"); goto error; } IPW_DEBUG_INFO("device response after %dms\n", rc); @@ -3386,9 +3369,7 @@ static int ipw_load(struct ipw_priv *priv) ipw_write32(priv, IPW_INTA_RW, IPW_INTA_MASK_ALL); #ifndef CONFIG_PM - release_firmware(bootfw); - release_firmware(ucode); - release_firmware(firmware); + release_firmware(raw); #endif return 0; @@ -3398,15 +3379,11 @@ static int ipw_load(struct ipw_priv *priv) priv->rxq = NULL; } ipw_tx_queue_free(priv); - if (bootfw) - release_firmware(bootfw); - if (ucode) - release_firmware(ucode); - if (firmware) - release_firmware(firmware); + if (raw) + release_firmware(raw); #ifdef CONFIG_PM fw_loaded = 0; - bootfw = ucode = firmware = NULL; + raw = NULL; #endif return rc; @@ -3715,9 +3692,9 @@ static int ipw_disassociate(void *data) static void ipw_bg_disassociate(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_disassociate(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static void ipw_system_config(void *data) @@ -4077,9 +4054,9 @@ static void ipw_gather_stats(struct ipw_priv *priv) static void ipw_bg_gather_stats(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_gather_stats(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } /* Missed beacon behavior: @@ -4121,8 +4098,9 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv, return; } - if (missed_count > priv->roaming_threshold && - missed_count <= priv->disassociate_threshold) { + if (roaming && + (missed_count > priv->roaming_threshold && + missed_count <= priv->disassociate_threshold)) { /* If we are not already roaming, set the ROAM * bit in the status and kick off a scan. * This can happen several times before we reach @@ -4150,7 +4128,6 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv, } IPW_DEBUG_NOTIF("Missed beacon: %d\n", missed_count); - } /** @@ -4527,10 +4504,9 @@ static void ipw_rx_notification(struct ipw_priv *priv, if (notif->size == sizeof(*x)) { IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE, - "link deterioration: '%s' " MAC_FMT - " \n", escape_essid(priv->essid, - priv->essid_len), - MAC_ARG(priv->bssid)); + "link deterioration: type %d, cnt %d\n", + x->silence_notification_type, + x->silence_count); memcpy(&priv->last_link_deterioration, x, sizeof(*x)); } else { @@ -4911,13 +4887,13 @@ static void ipw_rx_queue_replenish(void *data) static void ipw_bg_rx_queue_replenish(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_rx_queue_replenish(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } /* Assumes that the skb field of the buffers in 'pool' is kept accurate. - * If an SKB has been detached, the POOL needs to have it's SKB set to NULL + * If an SKB has been detached, the POOL needs to have its SKB set to NULL * This free routine walks the list of POOL entries and if SKB is set to * non NULL it is unmapped and freed */ @@ -5257,10 +5233,11 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (priv->ieee->scan_age != 0 && time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) { IPW_DEBUG_MERGE("Network '%s (" MAC_FMT ")' excluded " - "because of age: %lums.\n", + "because of age: %ums.\n", escape_essid(network->ssid, network->ssid_len), MAC_ARG(network->bssid), - 1000 * (jiffies - network->last_scanned) / HZ); + jiffies_to_msecs(jiffies - + network->last_scanned)); return 0; } @@ -5369,7 +5346,7 @@ static void ipw_merge_adhoc_network(void *data) return; } - down(&priv->sem); + mutex_lock(&priv->mutex); if ((priv->ieee->iw_mode == IW_MODE_ADHOC)) { IPW_DEBUG_MERGE("remove network %s\n", escape_essid(priv->essid, @@ -5379,7 +5356,7 @@ static void ipw_merge_adhoc_network(void *data) ipw_disassociate(priv); priv->assoc_network = match.network; - up(&priv->sem); + mutex_unlock(&priv->mutex); return; } } @@ -5467,11 +5444,12 @@ static int ipw_best_network(struct ipw_priv *priv, if (network->last_associate && time_after(network->last_associate + (HZ * 3UL), jiffies)) { IPW_DEBUG_ASSOC("Network '%s (" MAC_FMT ")' excluded " - "because of storming (%lus since last " + "because of storming (%ums since last " "assoc attempt).\n", escape_essid(network->ssid, network->ssid_len), MAC_ARG(network->bssid), - (jiffies - network->last_associate) / HZ); + jiffies_to_msecs(jiffies - + network->last_associate)); return 0; } @@ -5479,10 +5457,11 @@ static int ipw_best_network(struct ipw_priv *priv, if (priv->ieee->scan_age != 0 && time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) { IPW_DEBUG_ASSOC("Network '%s (" MAC_FMT ")' excluded " - "because of age: %lums.\n", + "because of age: %ums.\n", escape_essid(network->ssid, network->ssid_len), MAC_ARG(network->bssid), - 1000 * (jiffies - network->last_scanned) / HZ); + jiffies_to_msecs(jiffies - + network->last_scanned)); return 0; } @@ -5510,15 +5489,6 @@ static int ipw_best_network(struct ipw_priv *priv, return 0; } - if (!priv->ieee->wpa_enabled && (network->wpa_ie_len > 0 || - network->rsn_ie_len > 0)) { - IPW_DEBUG_ASSOC("Network '%s (" MAC_FMT ")' excluded " - "because of WPA capability mismatch.\n", - escape_essid(network->ssid, network->ssid_len), - MAC_ARG(network->bssid)); - return 0; - } - if ((priv->config & CFG_STATIC_BSSID) && memcmp(network->bssid, priv->bssid, ETH_ALEN)) { IPW_DEBUG_ASSOC("Network '%s (" MAC_FMT ")' excluded " @@ -5539,7 +5509,7 @@ static int ipw_best_network(struct ipw_priv *priv, } /* Filter out invalid channel in current GEO */ - if (!ipw_is_valid_channel(priv->ieee, network->channel)) { + if (!ieee80211_is_valid_channel(priv->ieee, network->channel)) { IPW_DEBUG_ASSOC("Network '%s (" MAC_FMT ")' excluded " "because of invalid channel in current GEO\n", escape_essid(network->ssid, network->ssid_len), @@ -5584,7 +5554,7 @@ static int ipw_best_network(struct ipw_priv *priv, static void ipw_adhoc_create(struct ipw_priv *priv, struct ieee80211_network *network) { - const struct ieee80211_geo *geo = ipw_get_geo(priv->ieee); + const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee); int i; /* @@ -5599,10 +5569,10 @@ static void ipw_adhoc_create(struct ipw_priv *priv, * FW fatal error. * */ - switch (ipw_is_valid_channel(priv->ieee, priv->channel)) { + switch (ieee80211_is_valid_channel(priv->ieee, priv->channel)) { case IEEE80211_52GHZ_BAND: network->mode = IEEE_A; - i = ipw_channel_to_index(priv->ieee, priv->channel); + i = ieee80211_channel_to_index(priv->ieee, priv->channel); if (i == -1) BUG(); if (geo->a[i].flags & IEEE80211_CH_PASSIVE_ONLY) { @@ -5616,7 +5586,7 @@ static void ipw_adhoc_create(struct ipw_priv *priv, network->mode = IEEE_G; else network->mode = IEEE_B; - i = ipw_channel_to_index(priv->ieee, priv->channel); + i = ieee80211_channel_to_index(priv->ieee, priv->channel); if (i == -1) BUG(); if (geo->bg[i].flags & IEEE80211_CH_PASSIVE_ONLY) { @@ -5671,54 +5641,44 @@ static void ipw_adhoc_create(struct ipw_priv *priv, static void ipw_send_tgi_tx_key(struct ipw_priv *priv, int type, int index) { - struct ipw_tgi_tx_key *key; - struct host_cmd cmd = { - .cmd = IPW_CMD_TGI_TX_KEY, - .len = sizeof(*key) - }; + struct ipw_tgi_tx_key key; if (!(priv->ieee->sec.flags & (1 << index))) return; - key = (struct ipw_tgi_tx_key *)&cmd.param; - key->key_id = index; - memcpy(key->key, priv->ieee->sec.keys[index], SCM_TEMPORAL_KEY_LENGTH); - key->security_type = type; - key->station_index = 0; /* always 0 for BSS */ - key->flags = 0; + key.key_id = index; + memcpy(key.key, priv->ieee->sec.keys[index], SCM_TEMPORAL_KEY_LENGTH); + key.security_type = type; + key.station_index = 0; /* always 0 for BSS */ + key.flags = 0; /* 0 for new key; previous value of counter (after fatal error) */ - key->tx_counter[0] = 0; - key->tx_counter[1] = 0; + key.tx_counter[0] = 0; + key.tx_counter[1] = 0; - ipw_send_cmd(priv, &cmd); + ipw_send_cmd_pdu(priv, IPW_CMD_TGI_TX_KEY, sizeof(key), &key); } static void ipw_send_wep_keys(struct ipw_priv *priv, int type) { - struct ipw_wep_key *key; + struct ipw_wep_key key; int i; - struct host_cmd cmd = { - .cmd = IPW_CMD_WEP_KEY, - .len = sizeof(*key) - }; - key = (struct ipw_wep_key *)&cmd.param; - key->cmd_id = DINO_CMD_WEP_KEY; - key->seq_num = 0; + key.cmd_id = DINO_CMD_WEP_KEY; + key.seq_num = 0; /* Note: AES keys cannot be set for multiple times. * Only set it at the first time. */ for (i = 0; i < 4; i++) { - key->key_index = i | type; + key.key_index = i | type; if (!(priv->ieee->sec.flags & (1 << i))) { - key->key_size = 0; + key.key_size = 0; continue; } - key->key_size = priv->ieee->sec.key_sizes[i]; - memcpy(key->key, priv->ieee->sec.keys[i], key->key_size); + key.key_size = priv->ieee->sec.key_sizes[i]; + memcpy(key.key, priv->ieee->sec.keys[i], key.key_size); - ipw_send_cmd(priv, &cmd); + ipw_send_cmd_pdu(priv, IPW_CMD_WEP_KEY, sizeof(key), &key); } } @@ -5822,9 +5782,9 @@ static void ipw_adhoc_check(void *data) static void ipw_bg_adhoc_check(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_adhoc_check(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } #ifdef CONFIG_IPW2200_DEBUG @@ -5950,7 +5910,7 @@ static void ipw_add_scan_channels(struct ipw_priv *priv, const struct ieee80211_geo *geo; int i; - geo = ipw_get_geo(priv->ieee); + geo = ieee80211_get_geo(priv->ieee); if (priv->ieee->freq_band & IEEE80211_52GHZ_BAND) { int start = channel_index; @@ -6010,7 +5970,7 @@ static void ipw_add_scan_channels(struct ipw_priv *priv, channel_index++; scan->channels_list[channel_index] = channel; index = - ipw_channel_to_index(priv->ieee, channel); + ieee80211_channel_to_index(priv->ieee, channel); ipw_set_scan_type(scan, channel_index, geo->bg[index]. flags & @@ -6051,7 +6011,7 @@ static int ipw_request_scan(struct ipw_priv *priv) (priv->status & STATUS_EXIT_PENDING)) return 0; - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->status & STATUS_SCANNING) { IPW_DEBUG_HC("Concurrent scan requested. Ignoring.\n"); @@ -6092,7 +6052,7 @@ static int ipw_request_scan(struct ipw_priv *priv) u8 channel; u8 band = 0; - switch (ipw_is_valid_channel(priv->ieee, priv->channel)) { + switch (ieee80211_is_valid_channel(priv->ieee, priv->channel)) { case IEEE80211_52GHZ_BAND: band = (u8) (IPW_A_MODE << 6) | 1; channel = priv->channel; @@ -6159,16 +6119,16 @@ static int ipw_request_scan(struct ipw_priv *priv) queue_delayed_work(priv->workqueue, &priv->scan_check, IPW_SCAN_CHECK_WATCHDOG); done: - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } static void ipw_bg_abort_scan(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_abort_scan(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static int ipw_wpa_enable(struct ipw_priv *priv, int value) @@ -6193,6 +6153,9 @@ static int ipw_wpa_set_auth_algs(struct ipw_priv *priv, int value) } else if (value & IW_AUTH_ALG_OPEN_SYSTEM) { sec.auth_mode = WLAN_AUTH_OPEN; ieee->open_wep = 1; + } else if (value & IW_AUTH_ALG_LEAP) { + sec.auth_mode = WLAN_AUTH_LEAP; + ieee->open_wep = 1; } else return -EINVAL; @@ -6204,7 +6167,8 @@ static int ipw_wpa_set_auth_algs(struct ipw_priv *priv, int value) return ret; } -void ipw_wpa_assoc_frame(struct ipw_priv *priv, char *wpa_ie, int wpa_ie_len) +static void ipw_wpa_assoc_frame(struct ipw_priv *priv, char *wpa_ie, + int wpa_ie_len) { /* make sure WPA is enabled */ ipw_wpa_enable(priv, 1); @@ -6215,15 +6179,10 @@ void ipw_wpa_assoc_frame(struct ipw_priv *priv, char *wpa_ie, int wpa_ie_len) static int ipw_set_rsn_capa(struct ipw_priv *priv, char *capabilities, int length) { - struct host_cmd cmd = { - .cmd = IPW_CMD_RSN_CAPABILITIES, - .len = length, - }; - IPW_DEBUG_HC("HOST_CMD_RSN_CAPABILITIES\n"); - memcpy(cmd.param, capabilities, length); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_RSN_CAPABILITIES, length, + capabilities); } /* @@ -6244,7 +6203,7 @@ static int ipw_wx_set_genie(struct net_device *dev, (wrqu->data.length && extra == NULL)) return -EINVAL; - //down(&priv->sem); + //mutex_lock(&priv->mutex); //if (!ieee->wpa_enabled) { // err = -EOPNOTSUPP; @@ -6270,7 +6229,7 @@ static int ipw_wx_set_genie(struct net_device *dev, ipw_wpa_assoc_frame(priv, ieee->wpa_ie, ieee->wpa_ie_len); out: - //up(&priv->sem); + //mutex_unlock(&priv->mutex); return err; } @@ -6283,7 +6242,7 @@ static int ipw_wx_get_genie(struct net_device *dev, struct ieee80211_device *ieee = priv->ieee; int err = 0; - //down(&priv->sem); + //mutex_lock(&priv->mutex); //if (!ieee->wpa_enabled) { // err = -EOPNOTSUPP; @@ -6304,7 +6263,7 @@ static int ipw_wx_get_genie(struct net_device *dev, memcpy(extra, ieee->wpa_ie, ieee->wpa_ie_len); out: - //up(&priv->sem); + //mutex_unlock(&priv->mutex); return err; } @@ -6556,7 +6515,7 @@ static int ipw_wx_set_mlme(struct net_device *dev, * get the modulation type of the current network or * the card current mode */ -u8 ipw_qos_current_mode(struct ipw_priv * priv) +static u8 ipw_qos_current_mode(struct ipw_priv * priv) { u8 mode = 0; @@ -6964,12 +6923,12 @@ static void ipw_bg_qos_activate(void *data) if (priv == NULL) return; - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->status & STATUS_ASSOCIATED) ipw_qos_activate(priv, &(priv->assoc_network->qos_data)); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static int ipw_handle_probe_response(struct net_device *dev, @@ -7010,25 +6969,15 @@ static int ipw_handle_assoc_response(struct net_device *dev, static int ipw_send_qos_params_command(struct ipw_priv *priv, struct ieee80211_qos_parameters *qos_param) { - struct host_cmd cmd = { - .cmd = IPW_CMD_QOS_PARAMETERS, - .len = (sizeof(struct ieee80211_qos_parameters) * 3) - }; - - memcpy(cmd.param, qos_param, sizeof(*qos_param) * 3); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_QOS_PARAMETERS, + sizeof(*qos_param) * 3, qos_param); } static int ipw_send_qos_info_command(struct ipw_priv *priv, struct ieee80211_qos_information_element *qos_param) { - struct host_cmd cmd = { - .cmd = IPW_CMD_WME_INFO, - .len = sizeof(*qos_param) - }; - - memcpy(cmd.param, qos_param, sizeof(*qos_param)); - return ipw_send_cmd(priv, &cmd); + return ipw_send_cmd_pdu(priv, IPW_CMD_WME_INFO, sizeof(*qos_param), + qos_param); } #endif /* CONFIG_IPW_QOS */ @@ -7052,19 +7001,21 @@ static int ipw_associate_network(struct ipw_priv *priv, memset(&priv->assoc_request, 0, sizeof(priv->assoc_request)); priv->assoc_request.channel = network->channel; + priv->assoc_request.auth_key = 0; + if ((priv->capability & CAP_PRIVACY_ON) && - (priv->capability & CAP_SHARED_KEY)) { + (priv->ieee->sec.auth_mode == WLAN_AUTH_SHARED_KEY)) { priv->assoc_request.auth_type = AUTH_SHARED_KEY; priv->assoc_request.auth_key = priv->ieee->sec.active_key; - if ((priv->capability & CAP_PRIVACY_ON) && - (priv->ieee->sec.level == SEC_LEVEL_1) && - !(priv->ieee->host_encrypt || priv->ieee->host_decrypt)) + if (priv->ieee->sec.level == SEC_LEVEL_1) ipw_send_wep_keys(priv, DCW_WEP_KEY_SEC_TYPE_WEP); - } else { + + } else if ((priv->capability & CAP_PRIVACY_ON) && + (priv->ieee->sec.auth_mode == WLAN_AUTH_LEAP)) + priv->assoc_request.auth_type = AUTH_LEAP; + else priv->assoc_request.auth_type = AUTH_OPEN; - priv->assoc_request.auth_key = 0; - } if (priv->ieee->wpa_ie_len) { priv->assoc_request.policy_support = 0x02; /* RSN active */ @@ -7278,9 +7229,9 @@ static void ipw_roam(void *data) static void ipw_bg_roam(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_roam(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static int ipw_associate(void *data) @@ -7375,9 +7326,9 @@ static int ipw_associate(void *data) static void ipw_bg_associate(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_associate(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv, @@ -7811,12 +7762,10 @@ static void ipw_rx(struct ipw_priv *priv) while (i != r) { rxb = priv->rxq->queue[i]; -#ifdef CONFIG_IPW2200_DEBUG if (unlikely(rxb == NULL)) { printk(KERN_CRIT "Queue not allocated!\n"); break; } -#endif priv->rxq->queue[i] = NULL; pci_dma_sync_single_for_cpu(priv->pci_dev, rxb->dma_addr, @@ -7835,7 +7784,8 @@ static void ipw_rx(struct ipw_priv *priv) le16_to_cpu(pkt->u.frame.rssi_dbm) - IPW_RSSI_TO_DBM, .signal = - le16_to_cpu(pkt->u.frame.signal), + le16_to_cpu(pkt->u.frame.rssi_dbm) - + IPW_RSSI_TO_DBM + 0x100, .noise = le16_to_cpu(pkt->u.frame.noise), .rate = pkt->u.frame.rate, @@ -7899,7 +7849,8 @@ static void ipw_rx(struct ipw_priv *priv) le16_to_cpu(pkt->u.frame.length)); if (le16_to_cpu(pkt->u.frame.length) < - frame_hdr_len(header)) { + ieee80211_get_hdrlen(le16_to_cpu( + header->frame_ctl))) { IPW_DEBUG_DROP ("Received packet is too small. " "Dropping.\n"); @@ -7989,7 +7940,14 @@ static void ipw_rx(struct ipw_priv *priv) #define DEFAULT_SHORT_RETRY_LIMIT 7U #define DEFAULT_LONG_RETRY_LIMIT 4U -static int ipw_sw_reset(struct ipw_priv *priv, int init) +/** + * ipw_sw_reset + * @option: options to control different reset behaviour + * 0 = reset everything except the 'disable' module_param + * 1 = reset everything and print out driver info (for probe only) + * 2 = reset everything + */ +static int ipw_sw_reset(struct ipw_priv *priv, int option) { int band, modulation; int old_mode = priv->ieee->iw_mode; @@ -8016,7 +7974,7 @@ static int ipw_sw_reset(struct ipw_priv *priv, int init) priv->essid_len = 0; memset(priv->essid, 0, IW_ESSID_MAX_SIZE); - if (disable) { + if (disable && option) { priv->status |= STATUS_RF_KILL_SW; IPW_DEBUG_INFO("Radio disabled.\n"); } @@ -8068,7 +8026,7 @@ static int ipw_sw_reset(struct ipw_priv *priv, int init) if ((priv->pci_dev->device == 0x4223) || (priv->pci_dev->device == 0x4224)) { - if (init) + if (option == 1) printk(KERN_INFO DRV_NAME ": Detected Intel PRO/Wireless 2915ABG Network " "Connection\n"); @@ -8079,7 +8037,7 @@ static int ipw_sw_reset(struct ipw_priv *priv, int init) priv->adapter = IPW_2915ABG; priv->ieee->mode = IEEE_A | IEEE_G | IEEE_B; } else { - if (init) + if (option == 1) printk(KERN_INFO DRV_NAME ": Detected Intel PRO/Wireless 2200BG Network " "Connection\n"); @@ -8126,7 +8084,7 @@ static int ipw_wx_get_name(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->status & STATUS_RF_KILL_MASK) strcpy(wrqu->name, "radio off"); else if (!(priv->status & STATUS_ASSOCIATED)) @@ -8135,7 +8093,7 @@ static int ipw_wx_get_name(struct net_device *dev, snprintf(wrqu->name, IFNAMSIZ, "IEEE 802.11%c", ipw_modes[priv->assoc_request.ieee_mode]); IPW_DEBUG_WX("Name: %s\n", wrqu->name); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8196,7 +8154,7 @@ static int ipw_wx_set_freq(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - const struct ieee80211_geo *geo = ipw_get_geo(priv->ieee); + const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee); struct iw_freq *fwrq = &wrqu->freq; int ret = 0, i; u8 channel, flags; @@ -8204,24 +8162,24 @@ static int ipw_wx_set_freq(struct net_device *dev, if (fwrq->m == 0) { IPW_DEBUG_WX("SET Freq/Channel -> any\n"); - down(&priv->sem); + mutex_lock(&priv->mutex); ret = ipw_set_channel(priv, 0); - up(&priv->sem); + mutex_unlock(&priv->mutex); return ret; } /* if setting by freq convert to channel */ if (fwrq->e == 1) { - channel = ipw_freq_to_channel(priv->ieee, fwrq->m); + channel = ieee80211_freq_to_channel(priv->ieee, fwrq->m); if (channel == 0) return -EINVAL; } else channel = fwrq->m; - if (!(band = ipw_is_valid_channel(priv->ieee, channel))) + if (!(band = ieee80211_is_valid_channel(priv->ieee, channel))) return -EINVAL; if (priv->ieee->iw_mode == IW_MODE_ADHOC) { - i = ipw_channel_to_index(priv->ieee, channel); + i = ieee80211_channel_to_index(priv->ieee, channel); if (i == -1) return -EINVAL; @@ -8234,9 +8192,9 @@ static int ipw_wx_set_freq(struct net_device *dev, } IPW_DEBUG_WX("SET Freq/Channel -> %d \n", fwrq->m); - down(&priv->sem); + mutex_lock(&priv->mutex); ret = ipw_set_channel(priv, channel); - up(&priv->sem); + mutex_unlock(&priv->mutex); return ret; } @@ -8250,14 +8208,14 @@ static int ipw_wx_get_freq(struct net_device *dev, /* If we are associated, trying to associate, or have a statically * configured CHANNEL then return that; otherwise return ANY */ - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->config & CFG_STATIC_CHANNEL || priv->status & (STATUS_ASSOCIATING | STATUS_ASSOCIATED)) wrqu->freq.m = priv->channel; else wrqu->freq.m = 0; - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET Freq/Channel -> %d \n", priv->channel); return 0; } @@ -8287,7 +8245,7 @@ static int ipw_wx_set_mode(struct net_device *dev, if (wrqu->mode == priv->ieee->iw_mode) return 0; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_sw_reset(priv, 0); @@ -8310,7 +8268,7 @@ static int ipw_wx_set_mode(struct net_device *dev, priv->ieee->iw_mode = wrqu->mode; queue_work(priv->workqueue, &priv->adapter_restart); - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } @@ -8319,10 +8277,10 @@ static int ipw_wx_get_mode(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->mode = priv->ieee->iw_mode; IPW_DEBUG_WX("Get MODE -> %d\n", wrqu->mode); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8349,7 +8307,7 @@ static int ipw_wx_get_range(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); struct iw_range *range = (struct iw_range *)extra; - const struct ieee80211_geo *geo = ipw_get_geo(priv->ieee); + const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee); int i = 0, j; wrqu->data.length = sizeof(*range); @@ -8361,7 +8319,7 @@ static int ipw_wx_get_range(struct net_device *dev, range->max_qual.qual = 100; /* TODO: Find real max RSSI and stick here */ range->max_qual.level = 0; - range->max_qual.noise = priv->ieee->worst_rssi + 0x100; + range->max_qual.noise = 0; range->max_qual.updated = 7; /* Updated all three */ range->avg_qual.qual = 70; @@ -8369,7 +8327,7 @@ static int ipw_wx_get_range(struct net_device *dev, range->avg_qual.level = 0; /* FIXME to real average level */ range->avg_qual.noise = 0; range->avg_qual.updated = 7; /* Updated all three */ - down(&priv->sem); + mutex_lock(&priv->mutex); range->num_bitrates = min(priv->rates.num_rates, (u8) IW_MAX_BITRATES); for (i = 0; i < range->num_bitrates; i++) @@ -8387,31 +8345,39 @@ static int ipw_wx_get_range(struct net_device *dev, /* Set the Wireless Extension versions */ range->we_version_compiled = WIRELESS_EXT; - range->we_version_source = 16; + range->we_version_source = 18; i = 0; if (priv->ieee->mode & (IEEE_B | IEEE_G)) { - for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES; - i++, j++) { + for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES; j++) { + if ((priv->ieee->iw_mode == IW_MODE_ADHOC) && + (geo->bg[j].flags & IEEE80211_CH_PASSIVE_ONLY)) + continue; + range->freq[i].i = geo->bg[j].channel; range->freq[i].m = geo->bg[j].freq * 100000; range->freq[i].e = 1; + i++; } } if (priv->ieee->mode & IEEE_A) { - for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES; - i++, j++) { + for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES; j++) { + if ((priv->ieee->iw_mode == IW_MODE_ADHOC) && + (geo->a[j].flags & IEEE80211_CH_PASSIVE_ONLY)) + continue; + range->freq[i].i = geo->a[j].channel; range->freq[i].m = geo->a[j].freq * 100000; range->freq[i].e = 1; + i++; } } range->num_channels = i; range->num_frequency = i; - up(&priv->sem); + mutex_unlock(&priv->mutex); /* Event capability (kernel + driver) */ range->event_capa[0] = (IW_EVENT_CAPA_K_0 | @@ -8419,6 +8385,9 @@ static int ipw_wx_get_range(struct net_device *dev, IW_EVENT_CAPA_MASK(SIOCGIWAP)); range->event_capa[1] = IW_EVENT_CAPA_K_1; + range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | + IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; + IPW_DEBUG_WX("GET Range\n"); return 0; } @@ -8438,7 +8407,7 @@ static int ipw_wx_set_wap(struct net_device *dev, if (wrqu->ap_addr.sa_family != ARPHRD_ETHER) return -EINVAL; - down(&priv->sem); + mutex_lock(&priv->mutex); if (!memcmp(any, wrqu->ap_addr.sa_data, ETH_ALEN) || !memcmp(off, wrqu->ap_addr.sa_data, ETH_ALEN)) { /* we disable mandatory BSSID association */ @@ -8447,14 +8416,14 @@ static int ipw_wx_set_wap(struct net_device *dev, IPW_DEBUG_ASSOC("Attempting to associate with new " "parameters.\n"); ipw_associate(priv); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } priv->config |= CFG_STATIC_BSSID; if (!memcmp(priv->bssid, wrqu->ap_addr.sa_data, ETH_ALEN)) { IPW_DEBUG_WX("BSSID set to current BSSID.\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8468,7 +8437,7 @@ static int ipw_wx_set_wap(struct net_device *dev, if (!ipw_disassociate(priv)) ipw_associate(priv); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8479,7 +8448,7 @@ static int ipw_wx_get_wap(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); /* If we are associated, trying to associate, or have a statically * configured BSSID then return that; otherwise return ANY */ - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->config & CFG_STATIC_BSSID || priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)) { wrqu->ap_addr.sa_family = ARPHRD_ETHER; @@ -8489,7 +8458,7 @@ static int ipw_wx_get_wap(struct net_device *dev, IPW_DEBUG_WX("Getting WAP BSSID: " MAC_FMT "\n", MAC_ARG(wrqu->ap_addr.sa_data)); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8500,7 +8469,7 @@ static int ipw_wx_set_essid(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); char *essid = ""; /* ANY */ int length = 0; - down(&priv->sem); + mutex_lock(&priv->mutex); if (wrqu->essid.flags && wrqu->essid.length) { length = wrqu->essid.length - 1; essid = extra; @@ -8515,7 +8484,7 @@ static int ipw_wx_set_essid(struct net_device *dev, priv->config &= ~CFG_STATIC_ESSID; ipw_associate(priv); } - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8525,7 +8494,7 @@ static int ipw_wx_set_essid(struct net_device *dev, if (priv->essid_len == length && !memcmp(priv->essid, extra, length)) { IPW_DEBUG_WX("ESSID set to current ESSID.\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8540,7 +8509,7 @@ static int ipw_wx_set_essid(struct net_device *dev, if (!ipw_disassociate(priv)) ipw_associate(priv); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8552,7 +8521,7 @@ static int ipw_wx_get_essid(struct net_device *dev, /* If we are associated, trying to associate, or have a statically * configured ESSID then return that; otherwise return ANY */ - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->config & CFG_STATIC_ESSID || priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)) { IPW_DEBUG_WX("Getting essid: '%s'\n", @@ -8565,7 +8534,7 @@ static int ipw_wx_get_essid(struct net_device *dev, wrqu->essid.length = 0; wrqu->essid.flags = 0; /* active */ } - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8578,12 +8547,12 @@ static int ipw_wx_set_nick(struct net_device *dev, IPW_DEBUG_WX("Setting nick to '%s'\n", extra); if (wrqu->data.length > IW_ESSID_MAX_SIZE) return -E2BIG; - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->data.length = min((size_t) wrqu->data.length, sizeof(priv->nick)); memset(priv->nick, 0, sizeof(priv->nick)); memcpy(priv->nick, extra, wrqu->data.length); IPW_DEBUG_TRACE("<<\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8594,11 +8563,57 @@ static int ipw_wx_get_nick(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); IPW_DEBUG_WX("Getting nick\n"); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->data.length = strlen(priv->nick) + 1; memcpy(extra, priv->nick, wrqu->data.length); wrqu->data.flags = 1; /* active */ - up(&priv->sem); + mutex_unlock(&priv->mutex); + return 0; +} + +static int ipw_wx_set_sens(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct ipw_priv *priv = ieee80211_priv(dev); + int err = 0; + + IPW_DEBUG_WX("Setting roaming threshold to %d\n", wrqu->sens.value); + IPW_DEBUG_WX("Setting disassociate threshold to %d\n", 3*wrqu->sens.value); + mutex_lock(&priv->mutex); + + if (wrqu->sens.fixed == 0) + { + priv->roaming_threshold = IPW_MB_ROAMING_THRESHOLD_DEFAULT; + priv->disassociate_threshold = IPW_MB_DISASSOCIATE_THRESHOLD_DEFAULT; + goto out; + } + if ((wrqu->sens.value > IPW_MB_ROAMING_THRESHOLD_MAX) || + (wrqu->sens.value < IPW_MB_ROAMING_THRESHOLD_MIN)) { + err = -EINVAL; + goto out; + } + + priv->roaming_threshold = wrqu->sens.value; + priv->disassociate_threshold = 3*wrqu->sens.value; + out: + mutex_unlock(&priv->mutex); + return err; +} + +static int ipw_wx_get_sens(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct ipw_priv *priv = ieee80211_priv(dev); + mutex_lock(&priv->mutex); + wrqu->sens.fixed = 1; + wrqu->sens.value = priv->roaming_threshold; + mutex_unlock(&priv->mutex); + + IPW_DEBUG_WX("GET roaming threshold -> %s %d \n", + wrqu->power.disabled ? "OFF" : "ON", wrqu->power.value); + return 0; } @@ -8691,7 +8706,7 @@ static int ipw_wx_set_rate(struct net_device *dev, apply: IPW_DEBUG_WX("Setting rate mask to 0x%08X [%s]\n", mask, fixed ? "fixed" : "sub-rates"); - down(&priv->sem); + mutex_lock(&priv->mutex); if (mask == IEEE80211_DEFAULT_RATES_MASK) { priv->config &= ~CFG_FIXED_RATE; ipw_set_fixed_rate(priv, priv->ieee->mode); @@ -8700,7 +8715,7 @@ static int ipw_wx_set_rate(struct net_device *dev, if (priv->rates_mask == mask) { IPW_DEBUG_WX("Mask set to current mask.\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8711,7 +8726,7 @@ static int ipw_wx_set_rate(struct net_device *dev, if (!ipw_disassociate(priv)) ipw_associate(priv); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -8720,9 +8735,9 @@ static int ipw_wx_get_rate(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->bitrate.value = priv->last_rate; - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET Rate -> %d \n", wrqu->bitrate.value); return 0; } @@ -8732,20 +8747,20 @@ static int ipw_wx_set_rts(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); if (wrqu->rts.disabled) priv->rts_threshold = DEFAULT_RTS_THRESHOLD; else { if (wrqu->rts.value < MIN_RTS_THRESHOLD || wrqu->rts.value > MAX_RTS_THRESHOLD) { - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EINVAL; } priv->rts_threshold = wrqu->rts.value; } ipw_send_rts_threshold(priv, priv->rts_threshold); - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("SET RTS Threshold -> %d \n", priv->rts_threshold); return 0; } @@ -8755,11 +8770,11 @@ static int ipw_wx_get_rts(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->rts.value = priv->rts_threshold; wrqu->rts.fixed = 0; /* no auto select */ wrqu->rts.disabled = (wrqu->rts.value == DEFAULT_RTS_THRESHOLD); - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET RTS Threshold -> %d \n", wrqu->rts.value); return 0; } @@ -8771,7 +8786,7 @@ static int ipw_wx_set_txpow(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); int err = 0; - down(&priv->sem); + mutex_lock(&priv->mutex); if (ipw_radio_kill_sw(priv, wrqu->power.disabled)) { err = -EINPROGRESS; goto out; @@ -8794,7 +8809,7 @@ static int ipw_wx_set_txpow(struct net_device *dev, priv->tx_power = wrqu->power.value; err = ipw_set_tx_power(priv); out: - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } @@ -8803,12 +8818,12 @@ static int ipw_wx_get_txpow(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->power.value = priv->tx_power; wrqu->power.fixed = 1; wrqu->power.flags = IW_TXPOW_DBM; wrqu->power.disabled = (priv->status & STATUS_RF_KILL_MASK) ? 1 : 0; - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET TX Power -> %s %d \n", wrqu->power.disabled ? "OFF" : "ON", wrqu->power.value); @@ -8821,13 +8836,13 @@ static int ipw_wx_set_frag(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); if (wrqu->frag.disabled) priv->ieee->fts = DEFAULT_FTS; else { if (wrqu->frag.value < MIN_FRAG_THRESHOLD || wrqu->frag.value > MAX_FRAG_THRESHOLD) { - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EINVAL; } @@ -8835,7 +8850,7 @@ static int ipw_wx_set_frag(struct net_device *dev, } ipw_send_frag_threshold(priv, wrqu->frag.value); - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("SET Frag Threshold -> %d \n", wrqu->frag.value); return 0; } @@ -8845,11 +8860,11 @@ static int ipw_wx_get_frag(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->frag.value = priv->ieee->fts; wrqu->frag.fixed = 0; /* no auto select */ wrqu->frag.disabled = (wrqu->frag.value == DEFAULT_FTS); - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET Frag Threshold -> %d \n", wrqu->frag.value); return 0; @@ -8870,7 +8885,7 @@ static int ipw_wx_set_retry(struct net_device *dev, if (wrqu->retry.value < 0 || wrqu->retry.value > 255) return -EINVAL; - down(&priv->sem); + mutex_lock(&priv->mutex); if (wrqu->retry.flags & IW_RETRY_MIN) priv->short_retry_limit = (u8) wrqu->retry.value; else if (wrqu->retry.flags & IW_RETRY_MAX) @@ -8882,7 +8897,7 @@ static int ipw_wx_set_retry(struct net_device *dev, ipw_send_retry_limit(priv, priv->short_retry_limit, priv->long_retry_limit); - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("SET retry limit -> short:%d long:%d\n", priv->short_retry_limit, priv->long_retry_limit); return 0; @@ -8894,11 +8909,11 @@ static int ipw_wx_get_retry(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); wrqu->retry.disabled = 0; if ((wrqu->retry.flags & IW_RETRY_TYPE) == IW_RETRY_LIFETIME) { - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EINVAL; } @@ -8912,7 +8927,7 @@ static int ipw_wx_get_retry(struct net_device *dev, wrqu->retry.flags = IW_RETRY_LIMIT; wrqu->retry.value = priv->short_retry_limit; } - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET retry -> %d \n", wrqu->retry.value); @@ -8929,7 +8944,7 @@ static int ipw_request_direct_scan(struct ipw_priv *priv, char *essid, (priv->status & STATUS_EXIT_PENDING)) return 0; - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->status & STATUS_RF_KILL_MASK) { IPW_DEBUG_HC("Aborting scan due to RF kill activation\n"); @@ -8981,7 +8996,7 @@ static int ipw_request_direct_scan(struct ipw_priv *priv, char *essid, priv->status |= STATUS_SCANNING; done: - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } @@ -9024,7 +9039,7 @@ static int ipw_wx_set_encode(struct net_device *dev, int ret; u32 cap = priv->capability; - down(&priv->sem); + mutex_lock(&priv->mutex); ret = ieee80211_wx_set_encode(priv->ieee, info, wrqu, key); /* In IBSS mode, we need to notify the firmware to update @@ -9034,7 +9049,7 @@ static int ipw_wx_set_encode(struct net_device *dev, priv->status & STATUS_ASSOCIATED) ipw_disassociate(priv); - up(&priv->sem); + mutex_unlock(&priv->mutex); return ret; } @@ -9052,17 +9067,17 @@ static int ipw_wx_set_power(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); int err; - down(&priv->sem); + mutex_lock(&priv->mutex); if (wrqu->power.disabled) { priv->power_mode = IPW_POWER_LEVEL(priv->power_mode); err = ipw_send_power_mode(priv, IPW_POWER_MODE_CAM); if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } IPW_DEBUG_WX("SET Power Management Mode -> off\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9074,7 +9089,7 @@ static int ipw_wx_set_power(struct net_device *dev, default: /* Otherwise we don't support it */ IPW_DEBUG_WX("SET PM Mode: %X not supported.\n", wrqu->power.flags); - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EOPNOTSUPP; } @@ -9087,12 +9102,12 @@ static int ipw_wx_set_power(struct net_device *dev, err = ipw_send_power_mode(priv, IPW_POWER_LEVEL(priv->power_mode)); if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } IPW_DEBUG_WX("SET Power Management Mode -> 0x%02X\n", priv->power_mode); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9101,13 +9116,13 @@ static int ipw_wx_get_power(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); if (!(priv->power_mode & IPW_POWER_ENABLED)) wrqu->power.disabled = 1; else wrqu->power.disabled = 0; - up(&priv->sem); + mutex_unlock(&priv->mutex); IPW_DEBUG_WX("GET Power Management Mode -> %02X\n", priv->power_mode); return 0; @@ -9120,7 +9135,7 @@ static int ipw_wx_set_powermode(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); int mode = *(int *)extra; int err; - down(&priv->sem); + mutex_lock(&priv->mutex); if ((mode < 1) || (mode > IPW_POWER_LIMIT)) { mode = IPW_POWER_AC; priv->power_mode = mode; @@ -9133,11 +9148,11 @@ static int ipw_wx_set_powermode(struct net_device *dev, if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return err; } } - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9186,7 +9201,7 @@ static int ipw_wx_set_wireless_mode(struct net_device *dev, IPW_WARNING("Attempt to set invalid wireless mode: %d\n", mode); return -EINVAL; } - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->adapter == IPW_2915ABG) { priv->ieee->abg_true = 1; if (mode & IEEE_A) { @@ -9198,7 +9213,7 @@ static int ipw_wx_set_wireless_mode(struct net_device *dev, if (mode & IEEE_A) { IPW_WARNING("Attempt to set 2200BG into " "802.11a mode\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EINVAL; } @@ -9235,7 +9250,7 @@ static int ipw_wx_set_wireless_mode(struct net_device *dev, IPW_DEBUG_WX("PRIV SET MODE: %c%c%c\n", mode & IEEE_A ? 'a' : '.', mode & IEEE_B ? 'b' : '.', mode & IEEE_G ? 'g' : '.'); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9244,7 +9259,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); switch (priv->ieee->mode) { case IEEE_A: strncpy(extra, "802.11a (1)", MAX_WX_STRING); @@ -9275,7 +9290,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev, IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra); wrqu->data.length = strlen(extra) + 1; - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9286,7 +9301,7 @@ static int ipw_wx_set_preamble(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); int mode = *(int *)extra; - down(&priv->sem); + mutex_lock(&priv->mutex); /* Switching from SHORT -> LONG requires a disassociation */ if (mode == 1) { if (!(priv->config & CFG_PREAMBLE_LONG)) { @@ -9305,11 +9320,11 @@ static int ipw_wx_set_preamble(struct net_device *dev, priv->config &= ~CFG_PREAMBLE_LONG; goto done; } - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EINVAL; done: - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9318,12 +9333,12 @@ static int ipw_wx_get_preamble(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); if (priv->config & CFG_PREAMBLE_LONG) snprintf(wrqu->name, IFNAMSIZ, "long (1)"); else snprintf(wrqu->name, IFNAMSIZ, "auto (0)"); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9335,7 +9350,7 @@ static int ipw_wx_set_monitor(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); int *parms = (int *)extra; int enable = (parms[0] > 0); - down(&priv->sem); + mutex_lock(&priv->mutex); IPW_DEBUG_WX("SET MONITOR: %d %d\n", enable, parms[1]); if (enable) { if (priv->ieee->iw_mode != IW_MODE_MONITOR) { @@ -9350,13 +9365,13 @@ static int ipw_wx_set_monitor(struct net_device *dev, ipw_set_channel(priv, parms[1]); } else { if (priv->ieee->iw_mode != IW_MODE_MONITOR) { - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } priv->net_dev->type = ARPHRD_ETHER; queue_work(priv->workqueue, &priv->adapter_restart); } - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9386,9 +9401,9 @@ static int ipw_wx_sw_reset(struct net_device *dev, IPW_DEBUG_WX("SW_RESET\n"); - down(&priv->sem); + mutex_lock(&priv->mutex); - ret = ipw_sw_reset(priv, 0); + ret = ipw_sw_reset(priv, 2); if (!ret) { free_firmware(); ipw_adapter_restart(priv); @@ -9398,9 +9413,9 @@ static int ipw_wx_sw_reset(struct net_device *dev, * module parameter, so take appropriate action */ ipw_radio_kill_sw(priv, priv->status & STATUS_RF_KILL_SW); - up(&priv->sem); + mutex_unlock(&priv->mutex); ieee80211_wx_set_encode(priv->ieee, info, &wrqu_sec, NULL); - down(&priv->sem); + mutex_lock(&priv->mutex); if (!(priv->status & STATUS_RF_KILL_MASK)) { /* Configuration likely changed -- force [re]association */ @@ -9410,7 +9425,7 @@ static int ipw_wx_sw_reset(struct net_device *dev, ipw_associate(priv); } - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9423,6 +9438,8 @@ static iw_handler ipw_wx_handlers[] = { IW_IOCTL(SIOCGIWFREQ) = ipw_wx_get_freq, IW_IOCTL(SIOCSIWMODE) = ipw_wx_set_mode, IW_IOCTL(SIOCGIWMODE) = ipw_wx_get_mode, + IW_IOCTL(SIOCSIWSENS) = ipw_wx_set_sens, + IW_IOCTL(SIOCGIWSENS) = ipw_wx_get_sens, IW_IOCTL(SIOCGIWRANGE) = ipw_wx_get_range, IW_IOCTL(SIOCSIWAP) = ipw_wx_set_wap, IW_IOCTL(SIOCGIWAP) = ipw_wx_get_wap, @@ -9568,7 +9585,7 @@ static struct iw_statistics *ipw_get_wireless_stats(struct net_device *dev) wstats->qual.level = average_value(&priv->average_rssi); wstats->qual.noise = average_value(&priv->average_noise); wstats->qual.updated = IW_QUAL_QUAL_UPDATED | IW_QUAL_LEVEL_UPDATED | - IW_QUAL_NOISE_UPDATED; + IW_QUAL_NOISE_UPDATED | IW_QUAL_DBM; wstats->miss.beacon = average_value(&priv->average_missed_beacons); wstats->discard.retries = priv->last_tx_failures; @@ -9586,7 +9603,7 @@ static struct iw_statistics *ipw_get_wireless_stats(struct net_device *dev) static void init_sys_config(struct ipw_sys_config *sys_config) { memset(sys_config, 0, sizeof(struct ipw_sys_config)); - sys_config->bt_coexistence = 1; /* We may need to look into prvStaBtConfig */ + sys_config->bt_coexistence = 0; sys_config->answer_broadcast_ssid_probe = 0; sys_config->accept_all_data_frames = 0; sys_config->accept_non_directed_frames = 1; @@ -9594,12 +9611,13 @@ static void init_sys_config(struct ipw_sys_config *sys_config) sys_config->disable_unicast_decryption = 1; sys_config->exclude_multicast_unencrypted = 0; sys_config->disable_multicast_decryption = 1; - sys_config->antenna_diversity = CFG_SYS_ANTENNA_BOTH; + sys_config->antenna_diversity = CFG_SYS_ANTENNA_SLOW_DIV; sys_config->pass_crc_to_host = 0; /* TODO: See if 1 gives us FCS */ sys_config->dot11g_auto_detection = 0; sys_config->enable_cts_to_self = 0; sys_config->bt_coexist_collision_thr = 0; sys_config->pass_noise_stats_to_host = 1; //1 -- fix for 256 + sys_config->silence_threshold = 0x1e; } static int ipw_net_open(struct net_device *dev) @@ -9607,11 +9625,11 @@ static int ipw_net_open(struct net_device *dev) struct ipw_priv *priv = ieee80211_priv(dev); IPW_DEBUG_INFO("dev->open\n"); /* we should be verifying the device is ready to be opened */ - down(&priv->sem); + mutex_lock(&priv->mutex); if (!(priv->status & STATUS_RF_KILL_MASK) && (priv->status & STATUS_ASSOCIATED)) netif_start_queue(dev); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9647,11 +9665,6 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb, u16 remaining_bytes; int fc; - /* If there isn't room in the queue, we return busy and let the - * network stack requeue the packet for us */ - if (ipw_queue_space(q) < q->high_mark) - return NETDEV_TX_BUSY; - switch (priv->ieee->iw_mode) { case IW_MODE_ADHOC: hdr_len = IEEE80211_3ADDR_LEN; @@ -9817,6 +9830,9 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb, q->first_empty = ipw_queue_inc_wrap(q->first_empty, q->n_bd); ipw_write32(priv, q->reg_w, q->first_empty); + if (ipw_queue_space(q) < q->high_mark) + netif_stop_queue(priv->net_dev); + return NETDEV_TX_OK; drop: @@ -9890,13 +9906,13 @@ static int ipw_net_set_mac_address(struct net_device *dev, void *p) struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - down(&priv->sem); + mutex_lock(&priv->mutex); priv->config |= CFG_CUSTOM_MAC; memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN); printk(KERN_INFO "%s: Setting MAC to " MAC_FMT "\n", priv->net_dev->name, MAC_ARG(priv->mac_addr)); queue_work(priv->workqueue, &priv->adapter_restart); - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -9940,9 +9956,9 @@ static int ipw_ethtool_get_eeprom(struct net_device *dev, if (eeprom->offset + eeprom->len > IPW_EEPROM_IMAGE_SIZE) return -EINVAL; - down(&p->sem); + mutex_lock(&p->mutex); memcpy(bytes, &p->eeprom[eeprom->offset], eeprom->len); - up(&p->sem); + mutex_unlock(&p->mutex); return 0; } @@ -9954,12 +9970,11 @@ static int ipw_ethtool_set_eeprom(struct net_device *dev, if (eeprom->offset + eeprom->len > IPW_EEPROM_IMAGE_SIZE) return -EINVAL; - down(&p->sem); + mutex_lock(&p->mutex); memcpy(&p->eeprom[eeprom->offset], bytes, eeprom->len); - for (i = IPW_EEPROM_DATA; - i < IPW_EEPROM_DATA + IPW_EEPROM_IMAGE_SIZE; i++) - ipw_write8(p, i, p->eeprom[i]); - up(&p->sem); + for (i = 0; i < IPW_EEPROM_IMAGE_SIZE; i++) + ipw_write8(p, i + IPW_EEPROM_DATA, p->eeprom[i]); + mutex_unlock(&p->mutex); return 0; } @@ -10054,12 +10069,12 @@ static void ipw_rf_kill(void *adapter) static void ipw_bg_rf_kill(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_rf_kill(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } -void ipw_link_up(struct ipw_priv *priv) +static void ipw_link_up(struct ipw_priv *priv) { priv->last_seq_num = -1; priv->last_frag_num = -1; @@ -10089,12 +10104,12 @@ void ipw_link_up(struct ipw_priv *priv) static void ipw_bg_link_up(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_link_up(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } -void ipw_link_down(struct ipw_priv *priv) +static void ipw_link_down(struct ipw_priv *priv) { ipw_led_link_down(priv); netif_carrier_off(priv->net_dev); @@ -10117,9 +10132,9 @@ void ipw_link_down(struct ipw_priv *priv) static void ipw_bg_link_down(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_link_down(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static int ipw_setup_deferred_work(struct ipw_priv *priv) @@ -10292,6 +10307,20 @@ static int ipw_config(struct ipw_priv *priv) /* set basic system config settings */ init_sys_config(&priv->sys_config); + + /* Support Bluetooth if we have BT h/w on board, and user wants to. + * Does not support BT priority yet (don't abort or defer our Tx) */ + if (bt_coexist) { + unsigned char bt_caps = priv->eeprom[EEPROM_SKU_CAPABILITY]; + + if (bt_caps & EEPROM_SKU_CAP_BT_CHANNEL_SIG) + priv->sys_config.bt_coexistence + |= CFG_BT_COEXISTENCE_SIGNAL_CHNL; + if (bt_caps & EEPROM_SKU_CAP_BT_OOB) + priv->sys_config.bt_coexistence + |= CFG_BT_COEXISTENCE_OOB; + } + if (priv->ieee->iw_mode == IW_MODE_ADHOC) priv->sys_config.answer_broadcast_ssid_probe = 1; else @@ -10349,6 +10378,9 @@ static int ipw_config(struct ipw_priv *priv) * not intended for resale of the above mentioned Intel adapters has * not been tested. * + * Remember to update the table in README.ipw2200 when changing this + * table. + * */ static const struct ieee80211_geo ipw_geos[] = { { /* Restricted */ @@ -10596,96 +10628,6 @@ static const struct ieee80211_geo ipw_geos[] = { } }; -/* GEO code borrowed from ieee80211_geo.c */ -static int ipw_is_valid_channel(struct ieee80211_device *ieee, u8 channel) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - /* NOTE: If G mode is currently supported but - * this is a B only channel, we don't see it - * as valid. */ - if ((ieee->geo.bg[i].channel == channel) && - (!(ieee->mode & IEEE_G) || - !(ieee->geo.bg[i].flags & IEEE80211_CH_B_ONLY))) - return IEEE80211_24GHZ_BAND; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].channel == channel) - return IEEE80211_52GHZ_BAND; - - return 0; -} - -static int ipw_channel_to_index(struct ieee80211_device *ieee, u8 channel) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - if (ieee->geo.bg[i].channel == channel) - return i; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].channel == channel) - return i; - - return -1; -} - -static u8 ipw_freq_to_channel(struct ieee80211_device *ieee, u32 freq) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); - - freq /= 100000; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - if (ieee->geo.bg[i].freq == freq) - return ieee->geo.bg[i].channel; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].freq == freq) - return ieee->geo.a[i].channel; - - return 0; -} - -static int ipw_set_geo(struct ieee80211_device *ieee, - const struct ieee80211_geo *geo) -{ - memcpy(ieee->geo.name, geo->name, 3); - ieee->geo.name[3] = '\0'; - ieee->geo.bg_channels = geo->bg_channels; - ieee->geo.a_channels = geo->a_channels; - memcpy(ieee->geo.bg, geo->bg, geo->bg_channels * - sizeof(struct ieee80211_channel)); - memcpy(ieee->geo.a, geo->a, ieee->geo.a_channels * - sizeof(struct ieee80211_channel)); - return 0; -} - -static const struct ieee80211_geo *ipw_get_geo(struct ieee80211_device *ieee) -{ - return &ieee->geo; -} - #define MAX_HW_RESTARTS 5 static int ipw_up(struct ipw_priv *priv) { @@ -10732,14 +10674,11 @@ static int ipw_up(struct ipw_priv *priv) priv->eeprom[EEPROM_COUNTRY_CODE + 2]); j = 0; } - if (ipw_set_geo(priv->ieee, &ipw_geos[j])) { + if (ieee80211_set_geo(priv->ieee, &ipw_geos[j])) { IPW_WARNING("Could not set geography."); return 0; } - IPW_DEBUG_INFO("Geography %03d [%s] detected.\n", - j, priv->ieee->geo.name); - if (priv->status & STATUS_RF_KILL_SW) { IPW_WARNING("Radio disabled by module parameter.\n"); return 0; @@ -10782,9 +10721,9 @@ static int ipw_up(struct ipw_priv *priv) static void ipw_bg_up(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_up(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } static void ipw_deinit(struct ipw_priv *priv) @@ -10853,23 +10792,23 @@ static void ipw_down(struct ipw_priv *priv) static void ipw_bg_down(void *data) { struct ipw_priv *priv = data; - down(&priv->sem); + mutex_lock(&priv->mutex); ipw_down(data); - up(&priv->sem); + mutex_unlock(&priv->mutex); } /* Called by register_netdev() */ static int ipw_net_init(struct net_device *dev) { struct ipw_priv *priv = ieee80211_priv(dev); - down(&priv->sem); + mutex_lock(&priv->mutex); if (ipw_up(priv)) { - up(&priv->sem); + mutex_unlock(&priv->mutex); return -EIO; } - up(&priv->sem); + mutex_unlock(&priv->mutex); return 0; } @@ -10959,7 +10898,7 @@ static int ipw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < IPW_IBSS_MAC_HASH_SIZE; i++) INIT_LIST_HEAD(&priv->ibss_mac_hash[i]); - init_MUTEX(&priv->sem); + mutex_init(&priv->mutex); if (pci_enable_device(pdev)) { err = -ENODEV; goto out_free_ieee80211; @@ -11017,7 +10956,7 @@ static int ipw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) SET_MODULE_OWNER(net_dev); SET_NETDEV_DEV(net_dev, &pdev->dev); - down(&priv->sem); + mutex_lock(&priv->mutex); priv->ieee->hard_start_xmit = ipw_net_hard_start_xmit; priv->ieee->set_security = shim__set_security; @@ -11050,16 +10989,22 @@ static int ipw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = sysfs_create_group(&pdev->dev.kobj, &ipw_attribute_group); if (err) { IPW_ERROR("failed to create sysfs device attributes\n"); - up(&priv->sem); + mutex_unlock(&priv->mutex); goto out_release_irq; } - up(&priv->sem); + mutex_unlock(&priv->mutex); err = register_netdev(net_dev); if (err) { IPW_ERROR("failed to register network device\n"); goto out_remove_sysfs; } + + printk(KERN_INFO DRV_NAME ": Detected geography %s (%d 802.11bg " + "channels, %d 802.11a channels)\n", + priv->ieee->geo.name, priv->ieee->geo.bg_channels, + priv->ieee->geo.a_channels); + return 0; out_remove_sysfs: @@ -11091,13 +11036,13 @@ static void ipw_pci_remove(struct pci_dev *pdev) if (!priv) return; - down(&priv->sem); + mutex_lock(&priv->mutex); priv->status |= STATUS_EXIT_PENDING; ipw_down(priv); sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group); - up(&priv->sem); + mutex_unlock(&priv->mutex); unregister_netdev(priv->net_dev); @@ -11250,8 +11195,10 @@ MODULE_PARM_DESC(auto_create, "auto create adhoc network (default on)"); module_param(led, int, 0444); MODULE_PARM_DESC(led, "enable led control on some systems (default 0 off)\n"); +#ifdef CONFIG_IPW2200_DEBUG module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "debug output mask"); +#endif module_param(channel, int, 0444); MODULE_PARM_DESC(channel, "channel to limit associate to (default 0 [ANY])"); @@ -11281,12 +11228,18 @@ module_param(mode, int, 0444); MODULE_PARM_DESC(mode, "network mode (0=BSS,1=IBSS)"); #endif +module_param(bt_coexist, int, 0444); +MODULE_PARM_DESC(bt_coexist, "enable bluetooth coexistence (default off)"); + module_param(hwcrypto, int, 0444); -MODULE_PARM_DESC(hwcrypto, "enable hardware crypto (default on)"); +MODULE_PARM_DESC(hwcrypto, "enable hardware crypto (default off)"); module_param(cmdlog, int, 0444); MODULE_PARM_DESC(cmdlog, "allocate a ring buffer for logging firmware commands"); +module_param(roaming, int, 0444); +MODULE_PARM_DESC(roaming, "enable roaming support (default on)"); + module_exit(ipw_exit); module_init(ipw_init); diff --git a/drivers/net/wireless/ipw2200.h b/drivers/net/wireless/ipw2200.h index e65620a..4b98049 100644 --- a/drivers/net/wireless/ipw2200.h +++ b/drivers/net/wireless/ipw2200.h @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -33,6 +33,7 @@ #include <linux/moduleparam.h> #include <linux/config.h> #include <linux/init.h> +#include <linux/mutex.h> #include <linux/pci.h> #include <linux/netdevice.h> @@ -46,6 +47,7 @@ #include <linux/firmware.h> #include <linux/wireless.h> #include <linux/dma-mapping.h> +#include <linux/jiffies.h> #include <asm/io.h> #include <net/ieee80211.h> @@ -244,8 +246,10 @@ enum connection_manager_assoc_states { #define HOST_NOTIFICATION_S36_MEASUREMENT_REFUSED 31 #define HOST_NOTIFICATION_STATUS_BEACON_MISSING 1 -#define IPW_MB_DISASSOCIATE_THRESHOLD_DEFAULT 24 +#define IPW_MB_ROAMING_THRESHOLD_MIN 1 #define IPW_MB_ROAMING_THRESHOLD_DEFAULT 8 +#define IPW_MB_ROAMING_THRESHOLD_MAX 30 +#define IPW_MB_DISASSOCIATE_THRESHOLD_DEFAULT 3*IPW_MB_ROAMING_THRESHOLD_DEFAULT #define IPW_REAL_RATE_RX_PACKET_THRESHOLD 300 #define MACADRR_BYTE_LEN 6 @@ -616,13 +620,16 @@ struct notif_tgi_tx_key { u8 reserved; } __attribute__ ((packed)); +#define SILENCE_OVER_THRESH (1) +#define SILENCE_UNDER_THRESH (2) + struct notif_link_deterioration { struct ipw_cmd_stats stats; u8 rate; u8 modulation; struct rate_histogram histogram; - u8 reserved1; - u16 reserved2; + u8 silence_notification_type; /* SILENCE_OVER/UNDER_THRESH */ + u16 silence_count; } __attribute__ ((packed)); struct notif_association { @@ -780,7 +787,7 @@ struct ipw_sys_config { u8 enable_cts_to_self; u8 enable_multicast_filtering; u8 bt_coexist_collision_thr; - u8 reserved2; + u8 silence_threshold; u8 accept_all_mgmt_bcpr; u8 accept_all_mgtm_frames; u8 pass_noise_stats_to_host; @@ -852,7 +859,7 @@ struct ipw_scan_request_ext { u16 dwell_time[IPW_SCAN_TYPES]; } __attribute__ ((packed)); -extern inline u8 ipw_get_scan_type(struct ipw_scan_request_ext *scan, u8 index) +static inline u8 ipw_get_scan_type(struct ipw_scan_request_ext *scan, u8 index) { if (index % 2) return scan->scan_type[index / 2] & 0x0F; @@ -860,7 +867,7 @@ extern inline u8 ipw_get_scan_type(struct ipw_scan_request_ext *scan, u8 index) return (scan->scan_type[index / 2] & 0xF0) >> 4; } -extern inline void ipw_set_scan_type(struct ipw_scan_request_ext *scan, +static inline void ipw_set_scan_type(struct ipw_scan_request_ext *scan, u8 index, u8 scan_type) { if (index % 2) @@ -1120,7 +1127,7 @@ struct ipw_priv { struct ieee80211_device *ieee; spinlock_t lock; - struct semaphore sem; + struct mutex mutex; /* basic pci-network driver stuff */ struct pci_dev *pci_dev; @@ -1406,13 +1413,6 @@ do { if (ipw_debug_level & (level)) \ * Register bit definitions */ -/* Dino control registers bits */ - -#define DINO_ENABLE_SYSTEM 0x80 -#define DINO_ENABLE_CS 0x40 -#define DINO_RXFIFO_DATA 0x01 -#define DINO_CONTROL_REG 0x00200000 - #define IPW_INTA_RW 0x00000008 #define IPW_INTA_MASK_R 0x0000000C #define IPW_INDIRECT_ADDR 0x00000010 @@ -1459,6 +1459,11 @@ do { if (ipw_debug_level & (level)) \ #define IPW_DOMAIN_0_END 0x1000 #define CLX_MEM_BAR_SIZE 0x1000 +/* Dino/baseband control registers bits */ + +#define DINO_ENABLE_SYSTEM 0x80 /* 1 = baseband processor on, 0 = reset */ +#define DINO_ENABLE_CS 0x40 /* 1 = enable ucode load */ +#define DINO_RXFIFO_DATA 0x01 /* 1 = data available */ #define IPW_BASEBAND_CONTROL_STATUS 0X00200000 #define IPW_BASEBAND_TX_FIFO_WRITE 0X00200004 #define IPW_BASEBAND_RX_FIFO_READ 0X00200004 @@ -1567,13 +1572,18 @@ do { if (ipw_debug_level & (level)) \ #define EEPROM_BSS_CHANNELS_BG (GET_EEPROM_ADDR(0x2c,LSB)) /* 2 bytes */ #define EEPROM_HW_VERSION (GET_EEPROM_ADDR(0x72,LSB)) /* 2 bytes */ -/* NIC type as found in the one byte EEPROM_NIC_TYPE offset*/ +/* NIC type as found in the one byte EEPROM_NIC_TYPE offset */ #define EEPROM_NIC_TYPE_0 0 #define EEPROM_NIC_TYPE_1 1 #define EEPROM_NIC_TYPE_2 2 #define EEPROM_NIC_TYPE_3 3 #define EEPROM_NIC_TYPE_4 4 +/* Bluetooth Coexistence capabilities as found in EEPROM_SKU_CAPABILITY */ +#define EEPROM_SKU_CAP_BT_CHANNEL_SIG 0x01 /* we can tell BT our channel # */ +#define EEPROM_SKU_CAP_BT_PRIORITY 0x02 /* BT can take priority over us */ +#define EEPROM_SKU_CAP_BT_OOB 0x04 /* we can signal BT out-of-band */ + #define FW_MEM_REG_LOWER_BOUND 0x00300000 #define FW_MEM_REG_EEPROM_ACCESS (FW_MEM_REG_LOWER_BOUND + 0x40) #define IPW_EVENT_REG (FW_MEM_REG_LOWER_BOUND + 0x04) @@ -1658,9 +1668,10 @@ enum { IPW_FW_ERROR_FATAL_ERROR }; -#define AUTH_OPEN 0 -#define AUTH_SHARED_KEY 1 -#define AUTH_IGNORE 3 +#define AUTH_OPEN 0 +#define AUTH_SHARED_KEY 1 +#define AUTH_LEAP 2 +#define AUTH_IGNORE 3 #define HC_ASSOCIATE 0 #define HC_REASSOCIATE 1 @@ -1860,7 +1871,7 @@ struct host_cmd { u8 cmd; u8 len; u16 reserved; - u32 param[TFD_CMD_IMMEDIATE_PAYLOAD_LENGTH]; + u32 *param; } __attribute__ ((packed)); struct ipw_cmd_log { @@ -1869,21 +1880,24 @@ struct ipw_cmd_log { struct host_cmd cmd; }; -#define CFG_BT_COEXISTENCE_MIN 0x00 -#define CFG_BT_COEXISTENCE_DEFER 0x02 -#define CFG_BT_COEXISTENCE_KILL 0x04 -#define CFG_BT_COEXISTENCE_WME_OVER_BT 0x08 -#define CFG_BT_COEXISTENCE_OOB 0x10 -#define CFG_BT_COEXISTENCE_MAX 0xFF -#define CFG_BT_COEXISTENCE_DEF 0x80 /* read Bt from EEPROM */ - -#define CFG_CTS_TO_ITSELF_ENABLED_MIN 0x0 -#define CFG_CTS_TO_ITSELF_ENABLED_MAX 0x1 +/* SysConfig command parameters ... */ +/* bt_coexistence param */ +#define CFG_BT_COEXISTENCE_SIGNAL_CHNL 0x01 /* tell BT our chnl # */ +#define CFG_BT_COEXISTENCE_DEFER 0x02 /* defer our Tx if BT traffic */ +#define CFG_BT_COEXISTENCE_KILL 0x04 /* kill our Tx if BT traffic */ +#define CFG_BT_COEXISTENCE_WME_OVER_BT 0x08 /* multimedia extensions */ +#define CFG_BT_COEXISTENCE_OOB 0x10 /* signal BT via out-of-band */ + +/* clear-to-send to self param */ +#define CFG_CTS_TO_ITSELF_ENABLED_MIN 0x00 +#define CFG_CTS_TO_ITSELF_ENABLED_MAX 0x01 #define CFG_CTS_TO_ITSELF_ENABLED_DEF CFG_CTS_TO_ITSELF_ENABLED_MIN -#define CFG_SYS_ANTENNA_BOTH 0x000 -#define CFG_SYS_ANTENNA_A 0x001 -#define CFG_SYS_ANTENNA_B 0x003 +/* Antenna diversity param (h/w can select best antenna, based on signal) */ +#define CFG_SYS_ANTENNA_BOTH 0x00 /* NIC selects best antenna */ +#define CFG_SYS_ANTENNA_A 0x01 /* force antenna A */ +#define CFG_SYS_ANTENNA_B 0x03 /* force antenna B */ +#define CFG_SYS_ANTENNA_SLOW_DIV 0x02 /* consider background noise */ /* * The definitions below were lifted off the ipw2100 driver, which only @@ -1899,27 +1913,4 @@ struct ipw_cmd_log { #define IPW_MAX_CONFIG_RETRIES 10 -static inline u32 frame_hdr_len(struct ieee80211_hdr_4addr *hdr) -{ - u32 retval; - u16 fc; - - retval = sizeof(struct ieee80211_hdr_3addr); - fc = le16_to_cpu(hdr->frame_ctl); - - /* - * Function ToDS FromDS - * IBSS 0 0 - * To AP 1 0 - * From AP 0 1 - * WDS (bridge) 1 1 - * - * Only WDS frames use Address4 among them. --YZ - */ - if (!(fc & IEEE80211_FCTL_TODS) || !(fc & IEEE80211_FCTL_FROMDS)) - retval -= ETH_ALEN; - - return retval; -} - #endif /* __ipw2200_h__ */ diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index bf6271e..75ce6dd 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -55,10 +55,8 @@ #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/bitops.h> -#ifdef CONFIG_NET_RADIO #include <linux/wireless.h> #include <net/iw_handler.h> -#endif #include <pcmcia/cs_types.h> #include <pcmcia/cs.h> diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 18baacf..18a4458 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -112,7 +112,7 @@ static const char StripVersion[] = "1.3A-STUART.CHESHIRE"; #include <linux/ip.h> #include <linux/tcp.h> #include <linux/time.h> - +#include <linux/jiffies.h> /************************************************************************/ /* Useful structures and definitions */ @@ -1569,7 +1569,7 @@ static int strip_xmit(struct sk_buff *skb, struct net_device *dev) del_timer(&strip_info->idle_timer); - if (jiffies - strip_info->pps_timer > HZ) { + if (time_after(jiffies, strip_info->pps_timer + HZ)) { unsigned long t = jiffies - strip_info->pps_timer; unsigned long rx_pps_count = (strip_info->rx_pps_count * HZ * 8 + t / 2) / t; unsigned long tx_pps_count = (strip_info->tx_pps_count * HZ * 8 + t / 2) / t; diff --git a/drivers/net/wireless/wavelan.p.h b/drivers/net/wireless/wavelan.p.h index 166e28b..5cb0bc8 100644 --- a/drivers/net/wireless/wavelan.p.h +++ b/drivers/net/wireless/wavelan.p.h @@ -98,11 +98,7 @@ * characteristics of the hardware. Applications such as mobile IP may * take advantage of it. * - * You will need to enable the CONFIG_NET_RADIO define in the kernel - * configuration to enable the wireless extensions (this is the one - * giving access to the radio network device choice). - * - * It might also be a good idea as well to fetch the wireless tools to + * It might be a good idea as well to fetch the wireless tools to * configure the device and play a bit. */ diff --git a/drivers/net/wireless/wavelan_cs.p.h b/drivers/net/wireless/wavelan_cs.p.h index f2d5975..451f627 100644 --- a/drivers/net/wireless/wavelan_cs.p.h +++ b/drivers/net/wireless/wavelan_cs.p.h @@ -99,11 +99,7 @@ * caracteristics of the hardware in a standard way and support for * applications for taking advantage of it (like Mobile IP). * - * You will need to enable the CONFIG_NET_RADIO define in the kernel - * configuration to enable the wireless extensions (this is the one - * giving access to the radio network device choice). - * - * It might also be a good idea as well to fetch the wireless tools to + * It might be a good idea as well to fetch the wireless tools to * configure the device and play a bit. */ @@ -440,11 +436,8 @@ #include <linux/ioport.h> #include <linux/fcntl.h> #include <linux/ethtool.h> - -#ifdef CONFIG_NET_RADIO #include <linux/wireless.h> /* Wireless extensions */ #include <net/iw_handler.h> /* New driver API */ -#endif /* Pcmcia headers that we need */ #include <pcmcia/cs_types.h> diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index 1c25065..75d56bf 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -69,8 +69,8 @@ static int fifo_cfg = 0x0020; /* Bypass external Tx FIFO. */ static int dma_ctrl = 0x00CAC277; /* Override when loading module! */ static int fifo_cfg = 0x0028; #else -static int dma_ctrl = 0x004A0263; /* Constrained by errata */ -static int fifo_cfg = 0x0020; /* Bypass external Tx FIFO. */ +static const int dma_ctrl = 0x004A0263; /* Constrained by errata */ +static const int fifo_cfg = 0x0020; /* Bypass external Tx FIFO. */ #endif /* Set the copy breakpoint for the copy-only-tiny-frames scheme. @@ -266,7 +266,7 @@ struct pci_id_info { int drv_flags; /* Driver use, intended as capability flags. */ }; -static struct pci_id_info pci_id_tbl[] = { +static const struct pci_id_info pci_id_tbl[] = { {"Yellowfin G-NIC Gigabit Ethernet", { 0x07021000, 0xffffffff}, PCI_IOTYPE, YELLOWFIN_SIZE, FullTxStatus | IsGigabit | HasMulticastBug | HasMACAddrBug | DontUseEeprom}, diff --git a/drivers/net/zorro8390.c b/drivers/net/zorro8390.c index 8ab6e12..7610216 100644 --- a/drivers/net/zorro8390.c +++ b/drivers/net/zorro8390.c @@ -27,6 +27,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/zorro.h> +#include <linux/jiffies.h> #include <asm/system.h> #include <asm/irq.h> @@ -151,7 +152,7 @@ static int __devinit zorro8390_init(struct net_device *dev, z_writeb(z_readb(ioaddr + NE_RESET), ioaddr + NE_RESET); while ((z_readb(ioaddr + NE_EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk(KERN_WARNING " not found (no reset ack).\n"); return -ENODEV; } @@ -273,7 +274,7 @@ static void zorro8390_reset_8390(struct net_device *dev) /* This check _should_not_ be necessary, omit eventually. */ while ((z_readb(NE_BASE+NE_EN0_ISR) & ENISR_RESET) == 0) - if (jiffies - reset_start_time > 2*HZ/100) { + if (time_after(jiffies, reset_start_time + 2*HZ/100)) { printk(KERN_WARNING "%s: ne_reset_8390() did not complete.\n", dev->name); break; @@ -400,7 +401,7 @@ static void zorro8390_block_output(struct net_device *dev, int count, dma_start = jiffies; while ((z_readb(NE_BASE + NE_EN0_ISR) & ENISR_RDC) == 0) - if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ + if (time_after(jiffies, dma_start + 2*HZ/100)) { /* 20ms */ printk(KERN_ERR "%s: timeout waiting for Tx RDC.\n", dev->name); zorro8390_reset_8390(dev); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 2e727f4..4413325 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -273,7 +273,7 @@ removeseg: list_del(&dev_info->lh); del_gendisk(dev_info->gd); - blk_put_queue(dev_info->dcssblk_queue); + blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(dev); @@ -491,7 +491,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char unregister_dev: PRINT_ERR("device_create_file() failed!\n"); list_del(&dev_info->lh); - blk_put_queue(dev_info->dcssblk_queue); + blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(&dev_info->dev); @@ -505,7 +505,7 @@ list_del: unload_seg: segment_unload(local_buf); dealloc_gendisk: - blk_put_queue(dev_info->dcssblk_queue); + blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); free_dev_info: @@ -562,7 +562,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch list_del(&dev_info->lh); del_gendisk(dev_info->gd); - blk_put_queue(dev_info->dcssblk_queue); + blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(&dev_info->dev); diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 1c8b612..3e156e0 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -440,7 +440,8 @@ static int __init bbc_i2c_init(void) struct linux_ebus_device *edev = NULL; int err, index = 0; - if (tlb_type != cheetah || !bbc_present()) + if ((tlb_type != cheetah && tlb_type != cheetah_plus) || + !bbc_present()) return -ENODEV; for_each_ebus(ebus) { @@ -486,3 +487,4 @@ static void bbc_i2c_cleanup(void) module_init(bbc_i2c_init); module_exit(bbc_i2c_cleanup); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index b3c561a..89e5413 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -582,6 +582,13 @@ config SERIAL_SUNSAB_CONSOLE on your Sparc system as the console, you can do so by answering Y to this option. +config SERIAL_SUNHV + bool "Sun4v Hypervisor Console support" + depends on SPARC64 + help + This driver supports the console device found on SUN4V Sparc + systems. Say Y if you want to be able to use this device. + config SERIAL_IP22_ZILOG tristate "IP22 Zilog8530 serial support" depends on SGI_IP22 diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index eaf8e01..50c221a 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_SERIAL_PXA) += pxa.o obj-$(CONFIG_SERIAL_SA1100) += sa1100.o obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o obj-$(CONFIG_SERIAL_SUNCORE) += suncore.o +obj-$(CONFIG_SERIAL_SUNHV) += sunhv.o obj-$(CONFIG_SERIAL_SUNZILOG) += sunzilog.o obj-$(CONFIG_SERIAL_IP22_ZILOG) += ip22zilog.o obj-$(CONFIG_SERIAL_SUNSU) += sunsu.o diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c new file mode 100644 index 0000000..f137804 --- /dev/null +++ b/drivers/serial/sunhv.c @@ -0,0 +1,550 @@ +/* sunhv.c: Serial driver for SUN4V hypervisor console. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/major.h> +#include <linux/circ_buf.h> +#include <linux/serial.h> +#include <linux/sysrq.h> +#include <linux/console.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/init.h> + +#include <asm/hypervisor.h> +#include <asm/spitfire.h> +#include <asm/vdev.h> +#include <asm/oplib.h> +#include <asm/irq.h> + +#if defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include <linux/serial_core.h> + +#include "suncore.h" + +#define CON_BREAK ((long)-1) +#define CON_HUP ((long)-2) + +static inline long hypervisor_con_getchar(long *status) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + register unsigned long arg1 asm("%o1"); + + func = HV_FAST_CONS_GETCHAR; + arg0 = 0; + arg1 = 0; + __asm__ __volatile__("ta %6" + : "=&r" (func), "=&r" (arg0), "=&r" (arg1) + : "0" (func), "1" (arg0), "2" (arg1), + "i" (HV_FAST_TRAP)); + + *status = arg0; + + return (long) arg1; +} + +static inline long hypervisor_con_putchar(long ch) +{ + register unsigned long func asm("%o5"); + register unsigned long arg0 asm("%o0"); + + func = HV_FAST_CONS_PUTCHAR; + arg0 = ch; + __asm__ __volatile__("ta %4" + : "=&r" (func), "=&r" (arg0) + : "0" (func), "1" (arg0), "i" (HV_FAST_TRAP)); + + return (long) arg0; +} + +#define IGNORE_BREAK 0x1 +#define IGNORE_ALL 0x2 + +static int hung_up = 0; + +static struct tty_struct *receive_chars(struct uart_port *port, struct pt_regs *regs) +{ + struct tty_struct *tty = NULL; + int saw_console_brk = 0; + int limit = 10000; + + if (port->info != NULL) /* Unopened serial console */ + tty = port->info->tty; + + while (limit-- > 0) { + long status; + long c = hypervisor_con_getchar(&status); + unsigned char flag; + + if (status == HV_EWOULDBLOCK) + break; + + if (c == CON_BREAK) { + if (uart_handle_break(port)) + continue; + saw_console_brk = 1; + c = 0; + } + + if (c == CON_HUP) { + hung_up = 1; + uart_handle_dcd_change(port, 0); + } else if (hung_up) { + hung_up = 0; + uart_handle_dcd_change(port, 1); + } + + if (tty == NULL) { + uart_handle_sysrq_char(port, c, regs); + continue; + } + + flag = TTY_NORMAL; + port->icount.rx++; + if (c == CON_BREAK) { + port->icount.brk++; + if (uart_handle_break(port)) + continue; + flag = TTY_BREAK; + } + + if (uart_handle_sysrq_char(port, c, regs)) + continue; + + if ((port->ignore_status_mask & IGNORE_ALL) || + ((port->ignore_status_mask & IGNORE_BREAK) && + (c == CON_BREAK))) + continue; + + tty_insert_flip_char(tty, c, flag); + } + + if (saw_console_brk) + sun_do_break(); + + return tty; +} + +static void transmit_chars(struct uart_port *port) +{ + struct circ_buf *xmit; + + if (!port->info) + return; + + xmit = &port->info->xmit; + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return; + + while (!uart_circ_empty(xmit)) { + long status = hypervisor_con_putchar(xmit->buf[xmit->tail]); + + if (status != HV_EOK) + break; + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); +} + +static irqreturn_t sunhv_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct uart_port *port = dev_id; + struct tty_struct *tty; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + tty = receive_chars(port, regs); + transmit_chars(port); + spin_unlock_irqrestore(&port->lock, flags); + + if (tty) + tty_flip_buffer_push(tty); + + return IRQ_HANDLED; +} + +/* port->lock is not held. */ +static unsigned int sunhv_tx_empty(struct uart_port *port) +{ + /* Transmitter is always empty for us. If the circ buffer + * is non-empty or there is an x_char pending, our caller + * will do the right thing and ignore what we return here. + */ + return TIOCSER_TEMT; +} + +/* port->lock held by caller. */ +static void sunhv_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + return; +} + +/* port->lock is held by caller and interrupts are disabled. */ +static unsigned int sunhv_get_mctrl(struct uart_port *port) +{ + return TIOCM_DSR | TIOCM_CAR | TIOCM_CTS; +} + +/* port->lock held by caller. */ +static void sunhv_stop_tx(struct uart_port *port) +{ + return; +} + +/* port->lock held by caller. */ +static void sunhv_start_tx(struct uart_port *port) +{ + struct circ_buf *xmit = &port->info->xmit; + + while (!uart_circ_empty(xmit)) { + long status = hypervisor_con_putchar(xmit->buf[xmit->tail]); + + if (status != HV_EOK) + break; + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } +} + +/* port->lock is not held. */ +static void sunhv_send_xchar(struct uart_port *port, char ch) +{ + unsigned long flags; + int limit = 10000; + + spin_lock_irqsave(&port->lock, flags); + + while (limit-- > 0) { + long status = hypervisor_con_putchar(ch); + if (status == HV_EOK) + break; + } + + spin_unlock_irqrestore(&port->lock, flags); +} + +/* port->lock held by caller. */ +static void sunhv_stop_rx(struct uart_port *port) +{ +} + +/* port->lock held by caller. */ +static void sunhv_enable_ms(struct uart_port *port) +{ +} + +/* port->lock is not held. */ +static void sunhv_break_ctl(struct uart_port *port, int break_state) +{ + if (break_state) { + unsigned long flags; + int limit = 1000000; + + spin_lock_irqsave(&port->lock, flags); + + while (limit-- > 0) { + long status = hypervisor_con_putchar(CON_BREAK); + if (status == HV_EOK) + break; + udelay(2); + } + + spin_unlock_irqrestore(&port->lock, flags); + } +} + +/* port->lock is not held. */ +static int sunhv_startup(struct uart_port *port) +{ + return 0; +} + +/* port->lock is not held. */ +static void sunhv_shutdown(struct uart_port *port) +{ +} + +/* port->lock is not held. */ +static void sunhv_set_termios(struct uart_port *port, struct termios *termios, + struct termios *old) +{ + unsigned int baud = uart_get_baud_rate(port, termios, old, 0, 4000000); + unsigned int quot = uart_get_divisor(port, baud); + unsigned int iflag, cflag; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + + iflag = termios->c_iflag; + cflag = termios->c_cflag; + + port->ignore_status_mask = 0; + if (iflag & IGNBRK) + port->ignore_status_mask |= IGNORE_BREAK; + if ((cflag & CREAD) == 0) + port->ignore_status_mask |= IGNORE_ALL; + + /* XXX */ + uart_update_timeout(port, cflag, + (port->uartclk / (16 * quot))); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *sunhv_type(struct uart_port *port) +{ + return "SUN4V HCONS"; +} + +static void sunhv_release_port(struct uart_port *port) +{ +} + +static int sunhv_request_port(struct uart_port *port) +{ + return 0; +} + +static void sunhv_config_port(struct uart_port *port, int flags) +{ +} + +static int sunhv_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + return -EINVAL; +} + +static struct uart_ops sunhv_pops = { + .tx_empty = sunhv_tx_empty, + .set_mctrl = sunhv_set_mctrl, + .get_mctrl = sunhv_get_mctrl, + .stop_tx = sunhv_stop_tx, + .start_tx = sunhv_start_tx, + .send_xchar = sunhv_send_xchar, + .stop_rx = sunhv_stop_rx, + .enable_ms = sunhv_enable_ms, + .break_ctl = sunhv_break_ctl, + .startup = sunhv_startup, + .shutdown = sunhv_shutdown, + .set_termios = sunhv_set_termios, + .type = sunhv_type, + .release_port = sunhv_release_port, + .request_port = sunhv_request_port, + .config_port = sunhv_config_port, + .verify_port = sunhv_verify_port, +}; + +static struct uart_driver sunhv_reg = { + .owner = THIS_MODULE, + .driver_name = "serial", + .devfs_name = "tts/", + .dev_name = "ttyS", + .major = TTY_MAJOR, +}; + +static struct uart_port *sunhv_port; + +static inline void sunhv_console_putchar(struct uart_port *port, char c) +{ + unsigned long flags; + int limit = 1000000; + + spin_lock_irqsave(&port->lock, flags); + + while (limit-- > 0) { + long status = hypervisor_con_putchar(c); + if (status == HV_EOK) + break; + udelay(2); + } + + spin_unlock_irqrestore(&port->lock, flags); +} + +static void sunhv_console_write(struct console *con, const char *s, unsigned n) +{ + struct uart_port *port = sunhv_port; + int i; + + for (i = 0; i < n; i++) { + if (*s == '\n') + sunhv_console_putchar(port, '\r'); + sunhv_console_putchar(port, *s++); + } +} + +static struct console sunhv_console = { + .name = "ttyHV", + .write = sunhv_console_write, + .device = uart_console_device, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &sunhv_reg, +}; + +static inline struct console *SUNHV_CONSOLE(void) +{ + if (con_is_present()) + return NULL; + + sunhv_console.index = 0; + + return &sunhv_console; +} + +static int __init hv_console_compatible(char *buf, int len) +{ + while (len) { + int this_len; + + if (!strcmp(buf, "qcn")) + return 1; + + this_len = strlen(buf) + 1; + + buf += this_len; + len -= this_len; + } + + return 0; +} + +static unsigned int __init get_interrupt(void) +{ + const char *cons_str = "console"; + const char *compat_str = "compatible"; + int node = prom_getchild(sun4v_vdev_root); + char buf[64]; + int err, len; + + node = prom_searchsiblings(node, cons_str); + if (!node) + return 0; + + len = prom_getproplen(node, compat_str); + if (len == 0 || len == -1) + return 0; + + err = prom_getproperty(node, compat_str, buf, 64); + if (err == -1) + return 0; + + if (!hv_console_compatible(buf, len)) + return 0; + + /* Ok, the this is the OBP node for the sun4v hypervisor + * console device. Decode the interrupt. + */ + return sun4v_vdev_device_interrupt(node); +} + +static int __init sunhv_init(void) +{ + struct uart_port *port; + int ret; + + if (tlb_type != hypervisor) + return -ENODEV; + + port = kmalloc(sizeof(struct uart_port), GFP_KERNEL); + if (unlikely(!port)) + return -ENOMEM; + + memset(port, 0, sizeof(struct uart_port)); + + port->line = 0; + port->ops = &sunhv_pops; + port->type = PORT_SUNHV; + port->uartclk = ( 29491200 / 16 ); /* arbitrary */ + + /* Set this just to make uart_configure_port() happy. */ + port->membase = (unsigned char __iomem *) __pa(port); + + port->irq = get_interrupt(); + if (!port->irq) { + kfree(port); + return -ENODEV; + } + + sunhv_reg.minor = sunserial_current_minor; + sunhv_reg.nr = 1; + + ret = uart_register_driver(&sunhv_reg); + if (ret < 0) { + printk(KERN_ERR "SUNHV: uart_register_driver() failed %d\n", + ret); + kfree(port); + + return ret; + } + + sunhv_reg.tty_driver->name_base = sunhv_reg.minor - 64; + sunserial_current_minor += 1; + + sunhv_reg.cons = SUNHV_CONSOLE(); + + sunhv_port = port; + + ret = uart_add_one_port(&sunhv_reg, port); + if (ret < 0) { + printk(KERN_ERR "SUNHV: uart_add_one_port() failed %d\n", ret); + sunserial_current_minor -= 1; + uart_unregister_driver(&sunhv_reg); + kfree(port); + sunhv_port = NULL; + return -ENODEV; + } + + if (request_irq(port->irq, sunhv_interrupt, + SA_SHIRQ, "serial(sunhv)", port)) { + printk(KERN_ERR "sunhv: Cannot register IRQ\n"); + uart_remove_one_port(&sunhv_reg, port); + sunserial_current_minor -= 1; + uart_unregister_driver(&sunhv_reg); + kfree(port); + sunhv_port = NULL; + return -ENODEV; + } + + return 0; +} + +static void __exit sunhv_exit(void) +{ + struct uart_port *port = sunhv_port; + + BUG_ON(!port); + + free_irq(port->irq, port); + + uart_remove_one_port(&sunhv_reg, port); + sunserial_current_minor -= 1; + + uart_unregister_driver(&sunhv_reg); + + kfree(sunhv_port); + sunhv_port = NULL; +} + +module_init(sunhv_init); +module_exit(sunhv_exit); + +MODULE_AUTHOR("David S. Miller"); +MODULE_DESCRIPTION("SUN4V Hypervisor console driver") +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index 8566422..a2fb0c2 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -955,14 +955,13 @@ static struct console sunsab_console = { .index = -1, .data = &sunsab_reg, }; -#define SUNSAB_CONSOLE (&sunsab_console) -static void __init sunsab_console_init(void) +static inline struct console *SUNSAB_CONSOLE(void) { int i; if (con_is_present()) - return; + return NULL; for (i = 0; i < num_channels; i++) { int this_minor = sunsab_reg.minor + i; @@ -971,13 +970,14 @@ static void __init sunsab_console_init(void) break; } if (i == num_channels) - return; + return NULL; sunsab_console.index = i; - register_console(&sunsab_console); + + return &sunsab_console; } #else -#define SUNSAB_CONSOLE (NULL) +#define SUNSAB_CONSOLE() (NULL) #define sunsab_console_init() do { } while (0) #endif @@ -1124,7 +1124,6 @@ static int __init sunsab_init(void) sunsab_reg.minor = sunserial_current_minor; sunsab_reg.nr = num_channels; - sunsab_reg.cons = SUNSAB_CONSOLE; ret = uart_register_driver(&sunsab_reg); if (ret < 0) { @@ -1143,10 +1142,12 @@ static int __init sunsab_init(void) return ret; } + sunsab_reg.tty_driver->name_base = sunsab_reg.minor - 64; + + sunsab_reg.cons = SUNSAB_CONSOLE(); + sunserial_current_minor += num_channels; - sunsab_console_init(); - for (i = 0; i < num_channels; i++) { struct uart_sunsab_port *up = &sunsab_ports[i]; diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index 4e453fa..46c44b8 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -1280,6 +1280,7 @@ static int __init sunsu_kbd_ms_init(struct uart_sunsu_port *up, int channel) struct serio *serio; #endif + spin_lock_init(&up->port.lock); up->port.line = channel; up->port.type = PORT_UNKNOWN; up->port.uartclk = (SU_BASE_BAUD * 16); @@ -1464,18 +1465,17 @@ static struct console sunsu_cons = { .index = -1, .data = &sunsu_reg, }; -#define SUNSU_CONSOLE (&sunsu_cons) /* * Register console. */ -static int __init sunsu_serial_console_init(void) +static inline struct console *SUNSU_CONSOLE(void) { int i; if (con_is_present()) - return 0; + return NULL; for (i = 0; i < UART_NR; i++) { int this_minor = sunsu_reg.minor + i; @@ -1484,16 +1484,16 @@ static int __init sunsu_serial_console_init(void) break; } if (i == UART_NR) - return 0; + return NULL; if (sunsu_ports[i].port_node == 0) - return 0; + return NULL; sunsu_cons.index = i; - register_console(&sunsu_cons); - return 0; + + return &sunsu_cons; } #else -#define SUNSU_CONSOLE (NULL) +#define SUNSU_CONSOLE() (NULL) #define sunsu_serial_console_init() do { } while (0) #endif @@ -1510,6 +1510,7 @@ static int __init sunsu_serial_init(void) up->su_type == SU_PORT_KBD) continue; + spin_lock_init(&up->port.lock); up->port.flags |= UPF_BOOT_AUTOCONF; up->port.type = PORT_UNKNOWN; up->port.uartclk = (SU_BASE_BAUD * 16); @@ -1523,16 +1524,19 @@ static int __init sunsu_serial_init(void) } sunsu_reg.minor = sunserial_current_minor; - sunserial_current_minor += instance; sunsu_reg.nr = instance; - sunsu_reg.cons = SUNSU_CONSOLE; ret = uart_register_driver(&sunsu_reg); if (ret < 0) return ret; - sunsu_serial_console_init(); + sunsu_reg.tty_driver->name_base = sunsu_reg.minor - 64; + + sunserial_current_minor += instance; + + sunsu_reg.cons = SUNSU_CONSOLE(); + for (i = 0; i < UART_NR; i++) { struct uart_sunsu_port *up = &sunsu_ports[i]; diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index 5cc4d4c..10b35c6 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -1390,7 +1390,6 @@ static struct console sunzilog_console = { .index = -1, .data = &sunzilog_reg, }; -#define SUNZILOG_CONSOLE (&sunzilog_console) static int __init sunzilog_console_init(void) { @@ -1413,8 +1412,31 @@ static int __init sunzilog_console_init(void) register_console(&sunzilog_console); return 0; } + +static inline struct console *SUNZILOG_CONSOLE(void) +{ + int i; + + if (con_is_present()) + return NULL; + + for (i = 0; i < NUM_CHANNELS; i++) { + int this_minor = sunzilog_reg.minor + i; + + if ((this_minor - 64) == (serial_console - 1)) + break; + } + if (i == NUM_CHANNELS) + return NULL; + + sunzilog_console.index = i; + sunzilog_port_table[i].flags |= SUNZILOG_FLAG_IS_CONS; + + return &sunzilog_console; +} + #else -#define SUNZILOG_CONSOLE (NULL) +#define SUNZILOG_CONSOLE() (NULL) #define sunzilog_console_init() do { } while (0) #endif @@ -1666,14 +1688,15 @@ static int __init sunzilog_ports_init(void) } sunzilog_reg.nr = uart_count; - sunzilog_reg.cons = SUNZILOG_CONSOLE; - sunzilog_reg.minor = sunserial_current_minor; - sunserial_current_minor += uart_count; ret = uart_register_driver(&sunzilog_reg); if (ret == 0) { - sunzilog_console_init(); + sunzilog_reg.tty_driver->name_base = sunzilog_reg.minor - 64; + sunzilog_reg.cons = SUNZILOG_CONSOLE(); + + sunserial_current_minor += uart_count; + for (i = 0; i < NUM_CHANNELS; i++) { struct uart_sunzilog_port *up = &sunzilog_port_table[i]; diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile index 6f1e0e9..3adb639 100644 --- a/fs/jfs/Makefile +++ b/fs/jfs/Makefile @@ -8,7 +8,8 @@ jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ jfs_unicode.o jfs_dtree.o jfs_inode.o \ jfs_extent.o symlink.o jfs_metapage.o \ - jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o xattr.o + jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ + resize.o xattr.o ioctl.o jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 461e493..e228130 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -183,6 +183,9 @@ cleanup: posix_acl_release(acl); } else inode->i_mode &= ~current->fs->umask; + + JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | + inode->i_mode; return rc; } @@ -207,12 +210,12 @@ static int jfs_acl_chmod(struct inode *inode) rc = posix_acl_chmod_masq(clone, inode->i_mode); if (!rc) { tid_t tid = txBegin(inode->i_sb, 0); - down(&JFS_IP(inode)->commit_sem); + mutex_lock(&JFS_IP(inode)->commit_mutex); rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone); if (!rc) rc = txCommit(tid, 1, &inode, 0); txEnd(tid); - up(&JFS_IP(inode)->commit_sem); + mutex_unlock(&JFS_IP(inode)->commit_mutex); } posix_acl_release(clone); diff --git a/fs/jfs/file.c b/fs/jfs/file.c index c2c19c9..e1ac6e4 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -113,4 +113,5 @@ struct file_operations jfs_file_operations = { .sendfile = generic_file_sendfile, .fsync = jfs_fsync, .release = jfs_release, + .ioctl = jfs_ioctl, }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9f942ca..51a5fed 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -55,6 +55,7 @@ void jfs_read_inode(struct inode *inode) inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); } + jfs_set_inode_flags(inode); } /* @@ -89,16 +90,16 @@ int jfs_commit_inode(struct inode *inode, int wait) } tid = txBegin(inode->i_sb, COMMIT_INODE); - down(&JFS_IP(inode)->commit_sem); + mutex_lock(&JFS_IP(inode)->commit_mutex); /* - * Retest inode state after taking commit_sem + * Retest inode state after taking commit_mutex */ if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode)) rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0); txEnd(tid); - up(&JFS_IP(inode)->commit_sem); + mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; } @@ -335,18 +336,18 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) tid = txBegin(ip->i_sb, 0); /* - * The commit_sem cannot be taken before txBegin. + * The commit_mutex cannot be taken before txBegin. * txBegin may block and there is a chance the inode * could be marked dirty and need to be committed * before txBegin unblocks */ - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); newsize = xtTruncate(tid, ip, length, COMMIT_TRUNCATE | COMMIT_PWMAP); if (newsize < 0) { txEnd(tid); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); break; } @@ -355,7 +356,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) txCommit(tid, 1, &ip, 0); txEnd(tid); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); } while (newsize > length); /* Truncate isn't always atomic */ } diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c new file mode 100644 index 0000000..67b3774 --- /dev/null +++ b/fs/jfs/ioctl.c @@ -0,0 +1,107 @@ +/* + * linux/fs/jfs/ioctl.c + * + * Copyright (C) 2006 Herbert Poetzl + * adapted from Remy Card's ext2/ioctl.c + */ + +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/ctype.h> +#include <linux/capability.h> +#include <linux/time.h> +#include <asm/current.h> +#include <asm/uaccess.h> + +#include "jfs_incore.h" +#include "jfs_dinode.h" +#include "jfs_inode.h" + + +static struct { + long jfs_flag; + long ext2_flag; +} jfs_map[] = { + {JFS_NOATIME_FL, EXT2_NOATIME_FL}, + {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL}, + {JFS_SYNC_FL, EXT2_SYNC_FL}, + {JFS_SECRM_FL, EXT2_SECRM_FL}, + {JFS_UNRM_FL, EXT2_UNRM_FL}, + {JFS_APPEND_FL, EXT2_APPEND_FL}, + {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL}, + {0, 0}, +}; + +static long jfs_map_ext2(unsigned long flags, int from) +{ + int index=0; + long mapped=0; + + while (jfs_map[index].jfs_flag) { + if (from) { + if (jfs_map[index].ext2_flag & flags) + mapped |= jfs_map[index].jfs_flag; + } else { + if (jfs_map[index].jfs_flag & flags) + mapped |= jfs_map[index].ext2_flag; + } + index++; + } + return mapped; +} + + +int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, + unsigned long arg) +{ + struct jfs_inode_info *jfs_inode = JFS_IP(inode); + unsigned int flags; + + switch (cmd) { + case JFS_IOC_GETFLAGS: + flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; + flags = jfs_map_ext2(flags, 0); + return put_user(flags, (int __user *) arg); + case JFS_IOC_SETFLAGS: { + unsigned int oldflags; + + if (IS_RDONLY(inode)) + return -EROFS; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EACCES; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + flags = jfs_map_ext2(flags, 1); + if (!S_ISDIR(inode->i_mode)) + flags &= ~JFS_DIRSYNC_FL; + + oldflags = jfs_inode->mode2; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + */ + if ((oldflags & JFS_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + } + + flags = flags & JFS_FL_USER_MODIFIABLE; + flags |= oldflags & ~JFS_FL_USER_MODIFIABLE; + jfs_inode->mode2 = flags; + + jfs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + return 0; + } + default: + return -ENOTTY; + } +} + diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 580a325..9f2572a 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -139,13 +139,36 @@ struct dinode { /* more extended mode bits: attributes for OS/2 */ #define IREADONLY 0x02000000 /* no write access to file */ -#define IARCHIVE 0x40000000 /* file archive bit */ -#define ISYSTEM 0x08000000 /* system file */ #define IHIDDEN 0x04000000 /* hidden file */ -#define IRASH 0x4E000000 /* mask for changeable attributes */ -#define INEWNAME 0x80000000 /* non-8.3 filename format */ +#define ISYSTEM 0x08000000 /* system file */ + #define IDIRECTORY 0x20000000 /* directory (shadow of real bit) */ +#define IARCHIVE 0x40000000 /* file archive bit */ +#define INEWNAME 0x80000000 /* non-8.3 filename format */ + +#define IRASH 0x4E000000 /* mask for changeable attributes */ #define ATTRSHIFT 25 /* bits to shift to move attribute specification to mode position */ +/* extended attributes for Linux */ + +#define JFS_NOATIME_FL 0x00080000 /* do not update atime */ + +#define JFS_DIRSYNC_FL 0x00100000 /* dirsync behaviour */ +#define JFS_SYNC_FL 0x00200000 /* Synchronous updates */ +#define JFS_SECRM_FL 0x00400000 /* Secure deletion */ +#define JFS_UNRM_FL 0x00800000 /* allow for undelete */ + +#define JFS_APPEND_FL 0x01000000 /* writes to file may only append */ +#define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */ + +#define JFS_FL_USER_VISIBLE 0x03F80000 +#define JFS_FL_USER_MODIFIABLE 0x03F80000 +#define JFS_FL_INHERIT 0x03C80000 + +/* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */ +#define JFS_IOC_GETFLAGS _IOR('f', 1, long) +#define JFS_IOC_SETFLAGS _IOW('f', 2, long) + + #endif /*_H_JFS_DINODE */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 79b5404..c161c98 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -64,9 +64,9 @@ * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. */ -#define BMAP_LOCK_INIT(bmp) init_MUTEX(&bmp->db_bmaplock) -#define BMAP_LOCK(bmp) down(&bmp->db_bmaplock) -#define BMAP_UNLOCK(bmp) up(&bmp->db_bmaplock) +#define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) +#define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) +#define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) /* * forward references @@ -125,7 +125,7 @@ static int dbGetL2AGSize(s64 nblocks); * into the table, with the table elements yielding the maximum * binary buddy of free bits within the character. */ -static s8 budtab[256] = { +static const s8 budtab[256] = { 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 32e2588..8b14cc8 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -243,7 +243,7 @@ struct dbmap { struct bmap { struct dbmap db_bmap; /* on-disk aggregate map descriptor */ struct inode *db_ipbmap; /* ptr to aggregate map incore inode */ - struct semaphore db_bmaplock; /* aggregate map lock */ + struct mutex db_bmaplock; /* aggregate map lock */ atomic_t db_active[MAXAG]; /* count of active, open files in AG */ u32 *db_DBmap; }; diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 404f33e..6c3f083 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -1005,6 +1005,9 @@ static int dtSplitUp(tid_t tid, DT_PUTPAGE(smp); + if (!DO_INDEX(ip)) + ip->i_size = xlen << sbi->l2bsize; + goto freeKeyName; } @@ -1055,7 +1058,9 @@ static int dtSplitUp(tid_t tid, xaddr = addressPXD(pxd) + xlen; dbFree(ip, xaddr, (s64) n); } - } + } else if (!DO_INDEX(ip)) + ip->i_size = lengthPXD(pxd) << sbi->l2bsize; + extendOut: DT_PUTPAGE(smp); @@ -1098,6 +1103,9 @@ static int dtSplitUp(tid_t tid, goto splitOut; } + if (!DO_INDEX(ip)) + ip->i_size += PSIZE; + /* * propagate up the router entry for the leaf page just split * @@ -2424,6 +2432,9 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, break; } + if (!DO_INDEX(ip)) + ip->i_size -= PSIZE; + return 0; } diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 4879603..5549378 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -94,7 +94,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) txBeginAnon(ip->i_sb); /* Avoid race with jfs_commit_inode() */ - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* validate extent length */ if (xlen > MAXXLEN) @@ -136,14 +136,14 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) */ nxlen = xlen; if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) { - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } /* Allocate blocks to quota. */ if (DQUOT_ALLOC_BLOCK(ip, nxlen)) { dbFree(ip, nxaddr, (s64) nxlen); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return -EDQUOT; } @@ -165,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) if (rc) { dbFree(ip, nxaddr, nxlen); DQUOT_FREE_BLOCK(ip, nxlen); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } @@ -177,7 +177,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) mark_inode_dirty(ip); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); /* * COMMIT_SyncList flags an anonymous tlock on page that is on * sync list. @@ -222,7 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) /* This blocks if we are low on resources */ txBeginAnon(ip->i_sb); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* validate extent length */ if (nxlen > MAXXLEN) nxlen = MAXXLEN; @@ -258,7 +258,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) /* Allocat blocks to quota. */ if (DQUOT_ALLOC_BLOCK(ip, nxlen)) { dbFree(ip, nxaddr, (s64) nxlen); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return -EDQUOT; } @@ -338,7 +338,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) mark_inode_dirty(ip); exit: - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } #endif /* _NOTYET */ @@ -439,12 +439,12 @@ int extRecord(struct inode *ip, xad_t * xp) txBeginAnon(ip->i_sb); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* update the extent */ rc = xtUpdate(0, ip, xp); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return rc; } diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 4efa0d0..ccbe60a 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -66,14 +66,14 @@ static HLIST_HEAD(aggregate_hash); * imap locks */ /* iag free list lock */ -#define IAGFREE_LOCK_INIT(imap) init_MUTEX(&imap->im_freelock) -#define IAGFREE_LOCK(imap) down(&imap->im_freelock) -#define IAGFREE_UNLOCK(imap) up(&imap->im_freelock) +#define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) +#define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) +#define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) /* per ag iag list locks */ -#define AG_LOCK_INIT(imap,index) init_MUTEX(&(imap->im_aglock[index])) -#define AG_LOCK(imap,agno) down(&imap->im_aglock[agno]) -#define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) +#define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) +#define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) +#define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) /* * forward references @@ -1261,7 +1261,7 @@ int diFree(struct inode *ip) * to be freed by the transaction; */ tid = txBegin(ipimap->i_sb, COMMIT_FORCE); - down(&JFS_IP(ipimap)->commit_sem); + mutex_lock(&JFS_IP(ipimap)->commit_mutex); /* acquire tlock of the iag page of the freed ixad * to force the page NOHOMEOK (even though no data is @@ -1294,7 +1294,7 @@ int diFree(struct inode *ip) rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* unlock the AG inode map information */ AG_UNLOCK(imap, agno); @@ -2554,13 +2554,13 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) * addressing structure pointing to the new iag page; */ tid = txBegin(sb, COMMIT_FORCE); - down(&JFS_IP(ipimap)->commit_sem); + mutex_lock(&JFS_IP(ipimap)->commit_mutex); /* update the inode map addressing structure to point to it */ if ((rc = xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* Free the blocks allocated for the iag since it was * not successfully added to the inode map */ @@ -2626,7 +2626,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); duplicateIXtree(sb, blkno, xlen, &xaddr); @@ -3074,14 +3074,40 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno, static int copy_from_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; + if (sbi->umask != -1) { + ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); + /* For directories, add x permission if r is allowed by umask */ + if (S_ISDIR(ip->i_mode)) { + if (ip->i_mode & 0400) + ip->i_mode |= 0100; + if (ip->i_mode & 0040) + ip->i_mode |= 0010; + if (ip->i_mode & 0004) + ip->i_mode |= 0001; + } + } ip->i_nlink = le32_to_cpu(dip->di_nlink); - ip->i_uid = le32_to_cpu(dip->di_uid); - ip->i_gid = le32_to_cpu(dip->di_gid); + + jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); + if (sbi->uid == -1) + ip->i_uid = jfs_ip->saved_uid; + else { + ip->i_uid = sbi->uid; + } + + jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); + if (sbi->gid == -1) + ip->i_gid = jfs_ip->saved_gid; + else { + ip->i_gid = sbi->gid; + } + ip->i_size = le64_to_cpu(dip->di_size); ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); @@ -3132,21 +3158,33 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) static void copy_to_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); dip->di_fileset = cpu_to_le32(jfs_ip->fileset); - dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); + dip->di_inostamp = cpu_to_le32(sbi->inostamp); dip->di_number = cpu_to_le32(ip->i_ino); dip->di_gen = cpu_to_le32(ip->i_generation); dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - dip->di_uid = cpu_to_le32(ip->i_uid); - dip->di_gid = cpu_to_le32(ip->i_gid); + if (sbi->uid == -1) + dip->di_uid = cpu_to_le32(ip->i_uid); + else + dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); + if (sbi->gid == -1) + dip->di_gid = cpu_to_le32(ip->i_gid); + else + dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones */ - dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode); + if (sbi->umask == -1) + dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | + ip->i_mode); + else /* Leave the original permissions alone */ + dip->di_mode = cpu_to_le32(jfs_ip->mode2); + dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h index 6b59ade..6e24465 100644 --- a/fs/jfs/jfs_imap.h +++ b/fs/jfs/jfs_imap.h @@ -140,8 +140,8 @@ struct dinomap { struct inomap { struct dinomap im_imap; /* 4096: inode allocation control */ struct inode *im_ipimap; /* 4: ptr to inode for imap */ - struct semaphore im_freelock; /* 4: iag free list lock */ - struct semaphore im_aglock[MAXAG]; /* 512: per AG locks */ + struct mutex im_freelock; /* 4: iag free list lock */ + struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ u32 *im_DBGdimap; atomic_t im_numinos; /* num of backed inodes */ atomic_t im_numfree; /* num of free backed inodes */ diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index dc21a5b..54d7371 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -19,6 +19,7 @@ #ifndef _H_JFS_INCORE #define _H_JFS_INCORE +#include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/bitops.h> @@ -37,6 +38,8 @@ struct jfs_inode_info { int fileset; /* fileset number (always 16)*/ uint mode2; /* jfs-specific mode */ + uint saved_uid; /* saved for uid mount option */ + uint saved_gid; /* saved for gid mount option */ pxd_t ixpxd; /* inode extent descriptor */ dxd_t acl; /* dxd describing acl */ dxd_t ea; /* dxd describing ea */ @@ -62,12 +65,12 @@ struct jfs_inode_info { */ struct rw_semaphore rdwrlock; /* - * commit_sem serializes transaction processing on an inode. + * commit_mutex serializes transaction processing on an inode. * It must be taken after beginning a transaction (txBegin), since * dirty inodes may be committed while a new transaction on the * inode is blocked in txBegin or TxBeginAnon */ - struct semaphore commit_sem; + struct mutex commit_mutex; /* xattr_sem allows us to access the xattrs without taking i_mutex */ struct rw_semaphore xattr_sem; lid_t xtlid; /* lid of xtree lock on directory */ @@ -169,6 +172,9 @@ struct jfs_sb_info { uint state; /* mount/recovery state */ unsigned long flag; /* mount time flags */ uint p_state; /* state prior to going no integrity */ + uint uid; /* uid to override on-disk uid */ + uint gid; /* gid to override on-disk gid */ + uint umask; /* umask to override on-disk umask */ }; /* jfs_sb_info commit_state */ diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 2af5efb..495df40 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -25,6 +25,26 @@ #include "jfs_dinode.h" #include "jfs_debug.h" + +void jfs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = JFS_IP(inode)->mode2; + + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | + S_NOATIME | S_DIRSYNC | S_SYNC); + + if (flags & JFS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & JFS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & JFS_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & JFS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; + if (flags & JFS_SYNC_FL) + inode->i_flags |= S_SYNC; +} + /* * NAME: ialloc() * @@ -63,6 +83,13 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode->i_gid = current->fsgid; /* + * New inodes need to save sane values on disk when + * uid & gid mount options are used + */ + jfs_inode->saved_uid = inode->i_uid; + jfs_inode->saved_gid = inode->i_gid; + + /* * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { @@ -74,10 +101,20 @@ struct inode *ialloc(struct inode *parent, umode_t mode) } inode->i_mode = mode; - if (S_ISDIR(mode)) - jfs_inode->mode2 = IDIRECTORY | mode; - else - jfs_inode->mode2 = INLINEEA | ISPARSE | mode; + /* inherit flags from parent */ + jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; + + if (S_ISDIR(mode)) { + jfs_inode->mode2 |= IDIRECTORY; + jfs_inode->mode2 &= ~JFS_DIRSYNC_FL; + } + else { + jfs_inode->mode2 |= INLINEEA | ISPARSE; + if (S_ISLNK(mode)) + jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); + } + jfs_inode->mode2 |= mode; + inode->i_blksize = sb->s_blocksize; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -98,6 +135,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_inode->atlhead = 0; jfs_inode->atltail = 0; jfs_inode->xtlid = 0; + jfs_set_inode_flags(inode); jfs_info("ialloc returns inode = 0x%p\n", inode); diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index b54bac5..095d471 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -20,6 +20,8 @@ extern struct inode *ialloc(struct inode *, umode_t); extern int jfs_fsync(struct file *, struct dentry *, int); +extern int jfs_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); extern void jfs_read_inode(struct inode *); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode*, int); @@ -29,6 +31,7 @@ extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern void jfs_set_inode_flags(struct inode *); extern struct address_space_operations jfs_aops; extern struct inode_operations jfs_dir_inode_operations; diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h index 10ad1d0..70ac9f7 100644 --- a/fs/jfs/jfs_lock.h +++ b/fs/jfs/jfs_lock.h @@ -20,6 +20,7 @@ #define _H_JFS_LOCK #include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/sched.h> /* diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index d27bac6..0b348b1 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -64,6 +64,7 @@ #include <linux/interrupt.h> #include <linux/smp_lock.h> #include <linux/completion.h> +#include <linux/kthread.h> #include <linux/buffer_head.h> /* for sync_blockdev() */ #include <linux/bio.h> #include <linux/suspend.h> @@ -81,15 +82,14 @@ */ static struct lbuf *log_redrive_list; static DEFINE_SPINLOCK(log_redrive_lock); -DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); /* * log read/write serialization (per log) */ -#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) -#define LOG_LOCK(log) down(&((log)->loglock)) -#define LOG_UNLOCK(log) up(&((log)->loglock)) +#define LOG_LOCK_INIT(log) mutex_init(&(log)->loglock) +#define LOG_LOCK(log) mutex_lock(&((log)->loglock)) +#define LOG_UNLOCK(log) mutex_unlock(&((log)->loglock)) /* @@ -1105,11 +1105,10 @@ int lmLogOpen(struct super_block *sb) } } - if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { + if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) { up(&jfs_log_sem); return -ENOMEM; } - memset(log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&log->sb_list); init_waitqueue_head(&log->syncwait); @@ -1181,9 +1180,8 @@ static int open_inline_log(struct super_block *sb) struct jfs_log *log; int rc; - if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) + if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) return -ENOMEM; - memset(log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&log->sb_list); init_waitqueue_head(&log->syncwait); @@ -1216,12 +1214,11 @@ static int open_dummy_log(struct super_block *sb) down(&jfs_log_sem); if (!dummy_log) { - dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); + dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL); if (!dummy_log) { up(&jfs_log_sem); return -ENOMEM; } - memset(dummy_log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&dummy_log->sb_list); init_waitqueue_head(&dummy_log->syncwait); dummy_log->no_integrity = 1; @@ -1980,7 +1977,7 @@ static inline void lbmRedrive(struct lbuf *bp) log_redrive_list = bp; spin_unlock_irqrestore(&log_redrive_lock, flags); - wake_up(&jfs_IO_thread_wait); + wake_up_process(jfsIOthread); } @@ -2347,13 +2344,7 @@ int jfsIOWait(void *arg) { struct lbuf *bp; - daemonize("jfsIO"); - - complete(&jfsIOwait); - do { - DECLARE_WAITQUEUE(wq, current); - spin_lock_irq(&log_redrive_lock); while ((bp = log_redrive_list) != 0) { log_redrive_list = bp->l_redrive_next; @@ -2362,21 +2353,19 @@ int jfsIOWait(void *arg) lbmStartIO(bp); spin_lock_irq(&log_redrive_lock); } + spin_unlock_irq(&log_redrive_lock); + if (freezing(current)) { - spin_unlock_irq(&log_redrive_lock); refrigerator(); } else { - add_wait_queue(&jfs_IO_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_irq(&log_redrive_lock); schedule(); current->state = TASK_RUNNING; - remove_wait_queue(&jfs_IO_thread_wait, &wq); } - } while (!jfs_stop_threads); + } while (!kthread_should_stop()); jfs_info("jfsIOWait being killed!"); - complete_and_exit(&jfsIOwait, 0); + return 0; } /* diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index e4978b5..8c6909b 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -389,7 +389,7 @@ struct jfs_log { int eor; /* 4: eor of last record in eol page */ struct lbuf *bp; /* 4: current log page buffer */ - struct semaphore loglock; /* 4: log write serialization lock */ + struct mutex loglock; /* 4: log write serialization lock */ /* syncpt */ int nextsync; /* 4: bytes to write before next syncpt */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 8a53981..5fbaeaa 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -104,10 +104,9 @@ static inline int insert_metapage(struct page *page, struct metapage *mp) if (PagePrivate(page)) a = mp_anchor(page); else { - a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS); + a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS); if (!a) return -ENOMEM; - memset(a, 0, sizeof(struct meta_anchor)); set_page_private(page, (unsigned long)a); SetPagePrivate(page); kmap(page); diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index fcf781b..682cf1a 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -113,12 +113,9 @@ extern int jfs_mount(struct super_block *); extern int jfs_mount_rw(struct super_block *, int); extern int jfs_umount(struct super_block *); extern int jfs_umount_rw(struct super_block *); - -extern int jfs_stop_threads; -extern struct completion jfsIOwait; -extern wait_queue_head_t jfs_IO_thread_wait; -extern wait_queue_head_t jfs_commit_thread_wait; -extern wait_queue_head_t jfs_sync_thread_wait; extern int jfs_extendfs(struct super_block *, s64, int); +extern struct task_struct *jfsIOthread; +extern struct task_struct *jfsSyncThread; + #endif /*_H_JFS_SUPERBLOCK */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 2ddb6b8..ac3d669 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -49,6 +49,7 @@ #include <linux/suspend.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/kthread.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -121,8 +122,7 @@ static DEFINE_SPINLOCK(jfsTxnLock); #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) -DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); -DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); +static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); static int jfs_commit_thread_waking; /* @@ -207,7 +207,7 @@ static lid_t txLockAlloc(void) if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { jfs_info("txLockAlloc tlocks low"); jfs_tlocks_low = 1; - wake_up(&jfs_sync_thread_wait); + wake_up_process(jfsSyncThread); } return lid; @@ -2743,10 +2743,6 @@ int jfs_lazycommit(void *arg) unsigned long flags; struct jfs_sb_info *sbi; - daemonize("jfsCommit"); - - complete(&jfsIOwait); - do { LAZY_LOCK(flags); jfs_commit_thread_waking = 0; /* OK to wake another thread */ @@ -2806,13 +2802,13 @@ int jfs_lazycommit(void *arg) current->state = TASK_RUNNING; remove_wait_queue(&jfs_commit_thread_wait, &wq); } - } while (!jfs_stop_threads); + } while (!kthread_should_stop()); if (!list_empty(&TxAnchor.unlock_queue)) jfs_err("jfs_lazycommit being killed w/pending transactions!"); else jfs_info("jfs_lazycommit being killed\n"); - complete_and_exit(&jfsIOwait, 0); + return 0; } void txLazyUnlock(struct tblock * tblk) @@ -2876,10 +2872,10 @@ restart: */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); - down(&jfs_ip->commit_sem); + mutex_lock(&jfs_ip->commit_mutex); txCommit(tid, 1, &ip, 0); txEnd(tid); - up(&jfs_ip->commit_sem); + mutex_unlock(&jfs_ip->commit_mutex); /* * Just to be safe. I don't know how * long we can run without blocking @@ -2932,10 +2928,6 @@ int jfs_sync(void *arg) int rc; tid_t tid; - daemonize("jfsSync"); - - complete(&jfsIOwait); - do { /* * write each inode on the anonymous inode list @@ -2952,7 +2944,7 @@ int jfs_sync(void *arg) * Inode is being freed */ list_del_init(&jfs_ip->anon_inode_list); - } else if (! down_trylock(&jfs_ip->commit_sem)) { + } else if (! !mutex_trylock(&jfs_ip->commit_mutex)) { /* * inode will be removed from anonymous list * when it is committed @@ -2961,7 +2953,7 @@ int jfs_sync(void *arg) tid = txBegin(ip->i_sb, COMMIT_INODE); rc = txCommit(tid, 1, &ip, 0); txEnd(tid); - up(&jfs_ip->commit_sem); + mutex_unlock(&jfs_ip->commit_mutex); iput(ip); /* @@ -2971,7 +2963,7 @@ int jfs_sync(void *arg) cond_resched(); TXN_LOCK(); } else { - /* We can't get the commit semaphore. It may + /* We can't get the commit mutex. It may * be held by a thread waiting for tlock's * so let's not block here. Save it to * put back on the anon_list. @@ -2996,19 +2988,15 @@ int jfs_sync(void *arg) TXN_UNLOCK(); refrigerator(); } else { - DECLARE_WAITQUEUE(wq, current); - - add_wait_queue(&jfs_sync_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); schedule(); current->state = TASK_RUNNING; - remove_wait_queue(&jfs_sync_thread_wait, &wq); } - } while (!jfs_stop_threads); + } while (!kthread_should_stop()); jfs_info("jfs_sync being killed"); - complete_and_exit(&jfsIOwait, 0); + return 0; } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4abbe86..309cee5 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -104,8 +104,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dip)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); rc = jfs_init_acl(tid, ip, dip); if (rc) @@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, out3: txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; @@ -238,8 +238,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dip)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); rc = jfs_init_acl(tid, ip, dip); if (rc) @@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) out3: txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; @@ -365,8 +365,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dip)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); iplist[0] = dip; iplist[1] = ip; @@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) if (rc == -EIO) txAbort(tid, 1); txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); goto out2; } @@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); /* * Truncating the directory index table is not guaranteed. It @@ -488,8 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dip)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); iplist[0] = dip; iplist[1] = ip; @@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) if (rc == -EIO) txAbort(tid, 1); /* Marks FS Dirty */ txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); IWRITE_UNLOCK(ip); goto out1; } @@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) if ((new_size = commitZeroLink(tid, ip)) < 0) { txAbort(tid, 1); /* Marks FS Dirty */ txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); IWRITE_UNLOCK(ip); rc = new_size; goto out1; @@ -556,13 +556,13 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); while (new_size && (rc == 0)) { tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); new_size = xtTruncate_pmap(tid, ip, new_size); if (new_size < 0) { txAbort(tid, 1); /* Marks FS Dirty */ @@ -570,7 +570,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) } else rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC); txEnd(tid); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); } if (ip->i_nlink == 0) @@ -805,8 +805,8 @@ static int jfs_link(struct dentry *old_dentry, tid = txBegin(ip->i_sb, 0); - down(&JFS_IP(dir)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dir)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* * scan parent directory for entry/freespace @@ -847,8 +847,8 @@ static int jfs_link(struct dentry *old_dentry, out: txEnd(tid); - up(&JFS_IP(dir)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dir)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); jfs_info("jfs_link: rc:%d", rc); return rc; @@ -916,8 +916,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dip)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); rc = jfs_init_security(tid, ip, dip); if (rc) @@ -1037,8 +1037,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, out3: txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(dip)->commit_mutex); + mutex_unlock(&JFS_IP(ip)->commit_mutex); if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; @@ -1141,13 +1141,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ tid = txBegin(new_dir->i_sb, 0); - down(&JFS_IP(new_dir)->commit_sem); - down(&JFS_IP(old_ip)->commit_sem); + mutex_lock(&JFS_IP(new_dir)->commit_mutex); + mutex_lock(&JFS_IP(old_ip)->commit_mutex); if (old_dir != new_dir) - down(&JFS_IP(old_dir)->commit_sem); + mutex_lock(&JFS_IP(old_dir)->commit_mutex); if (new_ip) { - down(&JFS_IP(new_ip)->commit_sem); + mutex_lock(&JFS_IP(new_ip)->commit_mutex); /* * Change existing directory entry to new inode number */ @@ -1160,10 +1160,10 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (S_ISDIR(new_ip->i_mode)) { new_ip->i_nlink--; if (new_ip->i_nlink) { - up(&JFS_IP(new_dir)->commit_sem); - up(&JFS_IP(old_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_dir)->commit_mutex); + mutex_unlock(&JFS_IP(old_ip)->commit_mutex); if (old_dir != new_dir) - up(&JFS_IP(old_dir)->commit_sem); + mutex_unlock(&JFS_IP(old_dir)->commit_mutex); if (!S_ISDIR(old_ip->i_mode) && new_ip) IWRITE_UNLOCK(new_ip); jfs_error(new_ip->i_sb, @@ -1282,16 +1282,16 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, out4: txEnd(tid); - up(&JFS_IP(new_dir)->commit_sem); - up(&JFS_IP(old_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_dir)->commit_mutex); + mutex_unlock(&JFS_IP(old_ip)->commit_mutex); if (old_dir != new_dir) - up(&JFS_IP(old_dir)->commit_sem); + mutex_unlock(&JFS_IP(old_dir)->commit_mutex); if (new_ip) - up(&JFS_IP(new_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_ip)->commit_mutex); while (new_size && (rc == 0)) { tid = txBegin(new_ip->i_sb, 0); - down(&JFS_IP(new_ip)->commit_sem); + mutex_lock(&JFS_IP(new_ip)->commit_mutex); new_size = xtTruncate_pmap(tid, new_ip, new_size); if (new_size < 0) { txAbort(tid, 1); @@ -1299,7 +1299,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, } else rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); txEnd(tid); - up(&JFS_IP(new_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_ip)->commit_mutex); } if (new_ip && (new_ip->i_nlink == 0)) set_cflag(COMMIT_Nolink, new_ip); @@ -1361,8 +1361,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, tid = txBegin(dir->i_sb, 0); - down(&JFS_IP(dir)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(dir)->commit_mutex); + mutex_lock(&JFS_IP(ip)->commit_mutex); rc = jfs_init_acl(tid, ip, dir); if (rc) @@ -1407,8 +1407,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, out3: txEnd(tid); - up(&JFS_IP(ip)->commit_sem); - up(&JFS_IP(dir)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dir)->commit_mutex); if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; @@ -1523,6 +1523,7 @@ struct file_operations jfs_dir_operations = { .read = generic_read_dir, .readdir = jfs_readdir, .fsync = jfs_fsync, + .ioctl = jfs_ioctl, }; static int jfs_ci_hash(struct dentry *dir, struct qstr *this) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 8d31f13..18f69e6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -25,6 +25,7 @@ #include <linux/vfs.h> #include <linux/mount.h> #include <linux/moduleparam.h> +#include <linux/kthread.h> #include <linux/posix_acl.h> #include <asm/uaccess.h> #include <linux/seq_file.h> @@ -54,11 +55,9 @@ static int commit_threads = 0; module_param(commit_threads, int, 0); MODULE_PARM_DESC(commit_threads, "Number of commit threads"); -int jfs_stop_threads; -static pid_t jfsIOthread; -static pid_t jfsCommitThread[MAX_COMMIT_THREADS]; -static pid_t jfsSyncThread; -DECLARE_COMPLETION(jfsIOwait); +static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS]; +struct task_struct *jfsIOthread; +struct task_struct *jfsSyncThread; #ifdef CONFIG_JFS_DEBUG int jfsloglevel = JFS_LOGLEVEL_WARN; @@ -195,7 +194,7 @@ static void jfs_put_super(struct super_block *sb) enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask }; static match_table_t tokens = { @@ -209,6 +208,9 @@ static match_table_t tokens = { {Opt_ignore, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_umask, "umask=%u"}, {Opt_err, NULL} }; @@ -313,7 +315,29 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, "JFS: quota operations not supported\n"); break; #endif - + case Opt_uid: + { + char *uid = args[0].from; + sbi->uid = simple_strtoul(uid, &uid, 0); + break; + } + case Opt_gid: + { + char *gid = args[0].from; + sbi->gid = simple_strtoul(gid, &gid, 0); + break; + } + case Opt_umask: + { + char *umask = args[0].from; + sbi->umask = simple_strtoul(umask, &umask, 8); + if (sbi->umask & ~0777) { + printk(KERN_ERR + "JFS: Invalid value of umask\n"); + goto cleanup; + } + break; + } default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -396,12 +420,12 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) if (!new_valid_dev(sb->s_bdev->bd_dev)) return -EOVERFLOW; - sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); + sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOSPC; - memset(sbi, 0, sizeof (struct jfs_sb_info)); sb->s_fs_info = sbi; sbi->sb = sb; + sbi->uid = sbi->gid = sbi->umask = -1; /* initialize the mount flag and determine the default error handler */ flag = JFS_ERR_REMOUNT_RO; @@ -564,10 +588,14 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); + if (sbi->uid != -1) + seq_printf(seq, ",uid=%d", sbi->uid); + if (sbi->gid != -1) + seq_printf(seq, ",gid=%d", sbi->gid); + if (sbi->umask != -1) + seq_printf(seq, ",umask=%03o", sbi->umask); if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); - else - seq_puts(seq, ",integrity"); #if defined(CONFIG_QUOTA) if (sbi->flag & JFS_USRQUOTA) @@ -617,7 +645,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); INIT_LIST_HEAD(&jfs_ip->anon_inode_list); init_rwsem(&jfs_ip->rdwrlock); - init_MUTEX(&jfs_ip->commit_sem); + mutex_init(&jfs_ip->commit_mutex); init_rwsem(&jfs_ip->xattr_sem); spin_lock_init(&jfs_ip->ag_lock); jfs_ip->active_ag = -1; @@ -661,12 +689,12 @@ static int __init init_jfs_fs(void) /* * I/O completion thread (endio) */ - jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL); - if (jfsIOthread < 0) { - jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread); + jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO"); + if (IS_ERR(jfsIOthread)) { + rc = PTR_ERR(jfsIOthread); + jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); goto end_txmngr; } - wait_for_completion(&jfsIOwait); /* Wait until thread starts */ if (commit_threads < 1) commit_threads = num_online_cpus(); @@ -674,24 +702,21 @@ static int __init init_jfs_fs(void) commit_threads = MAX_COMMIT_THREADS; for (i = 0; i < commit_threads; i++) { - jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL, - CLONE_KERNEL); - if (jfsCommitThread[i] < 0) { - jfs_err("init_jfs_fs: fork failed w/rc = %d", - jfsCommitThread[i]); + jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit"); + if (IS_ERR(jfsCommitThread[i])) { + rc = PTR_ERR(jfsCommitThread[i]); + jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); commit_threads = i; goto kill_committask; } - /* Wait until thread starts */ - wait_for_completion(&jfsIOwait); } - jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL); - if (jfsSyncThread < 0) { - jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread); + jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync"); + if (IS_ERR(jfsSyncThread)) { + rc = PTR_ERR(jfsSyncThread); + jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); goto kill_committask; } - wait_for_completion(&jfsIOwait); /* Wait until thread starts */ #ifdef PROC_FS_JFS jfs_proc_init(); @@ -700,13 +725,9 @@ static int __init init_jfs_fs(void) return register_filesystem(&jfs_fs_type); kill_committask: - jfs_stop_threads = 1; - wake_up_all(&jfs_commit_thread_wait); for (i = 0; i < commit_threads; i++) - wait_for_completion(&jfsIOwait); - - wake_up(&jfs_IO_thread_wait); - wait_for_completion(&jfsIOwait); /* Wait for thread exit */ + kthread_stop(jfsCommitThread[i]); + kthread_stop(jfsIOthread); end_txmngr: txExit(); free_metapage: @@ -722,16 +743,13 @@ static void __exit exit_jfs_fs(void) jfs_info("exit_jfs_fs called"); - jfs_stop_threads = 1; txExit(); metapage_exit(); - wake_up(&jfs_IO_thread_wait); - wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */ - wake_up_all(&jfs_commit_thread_wait); + + kthread_stop(jfsIOthread); for (i = 0; i < commit_threads; i++) - wait_for_completion(&jfsIOwait); - wake_up(&jfs_sync_thread_wait); - wait_for_completion(&jfsIOwait); /* Wait until Sync thread exits */ + kthread_stop(jfsCommitThread[i]); + kthread_stop(jfsSyncThread); #ifdef PROC_FS_JFS jfs_proc_clean(); #endif diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index f23048f..9bc5b7c 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -934,13 +934,13 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, } tid = txBegin(inode->i_sb, 0); - down(&ji->commit_sem); + mutex_lock(&ji->commit_mutex); rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len, flags); if (!rc) rc = txCommit(tid, 1, &inode, 0); txEnd(tid); - up(&ji->commit_sem); + mutex_unlock(&ji->commit_mutex); return rc; } @@ -1093,12 +1093,12 @@ int jfs_removexattr(struct dentry *dentry, const char *name) return rc; tid = txBegin(inode->i_sb, 0); - down(&ji->commit_sem); + mutex_lock(&ji->commit_mutex); rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE); if (!rc) rc = txCommit(tid, 1, &inode, 0); txEnd(tid); - up(&ji->commit_sem); + mutex_unlock(&ji->commit_mutex); return rc; } diff --git a/include/asm-sparc/idprom.h b/include/asm-sparc/idprom.h index d856e64..59083ed 100644 --- a/include/asm-sparc/idprom.h +++ b/include/asm-sparc/idprom.h @@ -7,27 +7,19 @@ #ifndef _SPARC_IDPROM_H #define _SPARC_IDPROM_H -/* Offset into the EEPROM where the id PROM is located on the 4c */ -#define IDPROM_OFFSET 0x7d8 +#include <linux/types.h> -/* On sun4m; physical. */ -/* MicroSPARC(-II) does not decode 31rd bit, but it works. */ -#define IDPROM_OFFSET_M 0xfd8 - -struct idprom -{ - unsigned char id_format; /* Format identifier (always 0x01) */ - unsigned char id_machtype; /* Machine type */ - unsigned char id_ethaddr[6]; /* Hardware ethernet address */ - long id_date; /* Date of manufacture */ - unsigned int id_sernum:24; /* Unique serial number */ - unsigned char id_cksum; /* Checksum - xor of the data bytes */ - unsigned char reserved[16]; +struct idprom { + u8 id_format; /* Format identifier (always 0x01) */ + u8 id_machtype; /* Machine type */ + u8 id_ethaddr[6]; /* Hardware ethernet address */ + s32 id_date; /* Date of manufacture */ + u32 id_sernum:24; /* Unique serial number */ + u8 id_cksum; /* Checksum - xor of the data bytes */ + u8 reserved[16]; }; extern struct idprom *idprom; extern void idprom_init(void); -#define IDPROM_SIZE (sizeof(struct idprom)) - #endif /* !(_SPARC_IDPROM_H) */ diff --git a/include/asm-sparc/oplib.h b/include/asm-sparc/oplib.h index d0d76b3..f283f8a 100644 --- a/include/asm-sparc/oplib.h +++ b/include/asm-sparc/oplib.h @@ -165,6 +165,7 @@ enum prom_input_device { PROMDEV_ITTYA, /* input from ttya */ PROMDEV_ITTYB, /* input from ttyb */ PROMDEV_IRSC, /* input from rsc */ + PROMDEV_IVCONS, /* input from virtual-console */ PROMDEV_I_UNK, }; @@ -177,6 +178,7 @@ enum prom_output_device { PROMDEV_OTTYA, /* to ttya */ PROMDEV_OTTYB, /* to ttyb */ PROMDEV_ORSC, /* to rsc */ + PROMDEV_OVCONS, /* to virtual-console */ PROMDEV_O_UNK, }; diff --git a/include/asm-sparc/uaccess.h b/include/asm-sparc/uaccess.h index f8f1ec1..3cf132e 100644 --- a/include/asm-sparc/uaccess.h +++ b/include/asm-sparc/uaccess.h @@ -120,17 +120,6 @@ case 8: __put_user_asm(x,d,addr,__pu_ret); break; \ default: __pu_ret = __put_user_bad(); break; \ } } else { __pu_ret = -EFAULT; } __pu_ret; }) -#define __put_user_check_ret(x,addr,size,retval) ({ \ -register int __foo __asm__ ("l1"); \ -if (__access_ok(addr,size)) { \ -switch (size) { \ -case 1: __put_user_asm_ret(x,b,addr,retval,__foo); break; \ -case 2: __put_user_asm_ret(x,h,addr,retval,__foo); break; \ -case 4: __put_user_asm_ret(x,,addr,retval,__foo); break; \ -case 8: __put_user_asm_ret(x,d,addr,retval,__foo); break; \ -default: if (__put_user_bad()) return retval; break; \ -} } else return retval; }) - #define __put_user_nocheck(x,addr,size) ({ \ register int __pu_ret; \ switch (size) { \ @@ -141,16 +130,6 @@ case 8: __put_user_asm(x,d,addr,__pu_ret); break; \ default: __pu_ret = __put_user_bad(); break; \ } __pu_ret; }) -#define __put_user_nocheck_ret(x,addr,size,retval) ({ \ -register int __foo __asm__ ("l1"); \ -switch (size) { \ -case 1: __put_user_asm_ret(x,b,addr,retval,__foo); break; \ -case 2: __put_user_asm_ret(x,h,addr,retval,__foo); break; \ -case 4: __put_user_asm_ret(x,,addr,retval,__foo); break; \ -case 8: __put_user_asm_ret(x,d,addr,retval,__foo); break; \ -default: if (__put_user_bad()) return retval; break; \ -} }) - #define __put_user_asm(x,size,addr,ret) \ __asm__ __volatile__( \ "/* Put user asm, inline. */\n" \ @@ -170,32 +149,6 @@ __asm__ __volatile__( \ : "=&r" (ret) : "r" (x), "m" (*__m(addr)), \ "i" (-EFAULT)) -#define __put_user_asm_ret(x,size,addr,ret,foo) \ -if (__builtin_constant_p(ret) && ret == -EFAULT) \ -__asm__ __volatile__( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size " %1, %2\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ - ".align 4\n\t" \ - ".word 1b, __ret_efault\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "m" (*__m(addr))); \ -else \ -__asm__ __volatile( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size " %1, %2\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ -"3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %3, %%o0\n\t" \ - ".previous\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "m" (*__m(addr)), "i" (ret)) - extern int __put_user_bad(void); #define __get_user_check(x,addr,size,type) ({ \ diff --git a/include/asm-sparc64/a.out.h b/include/asm-sparc64/a.out.h index 02af289..35cb5c9 100644 --- a/include/asm-sparc64/a.out.h +++ b/include/asm-sparc64/a.out.h @@ -95,7 +95,11 @@ struct relocation_info /* used when header.a_machtype == M_SPARC */ #ifdef __KERNEL__ -#define STACK_TOP (test_thread_flag(TIF_32BIT) ? 0xf0000000 : 0x80000000000L) +#define STACK_TOP32 ((1UL << 32UL) - PAGE_SIZE) +#define STACK_TOP64 (0x0000080000000000UL - (1UL << 32UL)) + +#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ + STACK_TOP32 : STACK_TOP64) #endif diff --git a/include/asm-sparc64/asi.h b/include/asm-sparc64/asi.h index 5348556..662a211 100644 --- a/include/asm-sparc64/asi.h +++ b/include/asm-sparc64/asi.h @@ -25,14 +25,27 @@ /* SpitFire and later extended ASIs. The "(III)" marker designates * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates - * Chip Multi Threading specific ASIs. + * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific + * ASIs, "(4V)" designates SUN4V specific ASIs. */ #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ +#define ASI_BLK_AIUP_4V 0x16 /* (4V) Prim, user, block ld/st */ +#define ASI_BLK_AIUS_4V 0x17 /* (4V) Sec, user, block ld/st */ #define ASI_PHYS_USE_EC_L 0x1c /* PADDR, E-cachable, little endian*/ #define ASI_PHYS_BYPASS_EC_E_L 0x1d /* PADDR, E-bit, little endian */ +#define ASI_BLK_AIUP_L_4V 0x1e /* (4V) Prim, user, block, l-endian*/ +#define ASI_BLK_AIUS_L_4V 0x1f /* (4V) Sec, user, block, l-endian */ +#define ASI_SCRATCHPAD 0x20 /* (4V) Scratch Pad Registers */ +#define ASI_MMU 0x21 /* (4V) MMU Context Registers */ +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 /* (NG) init-store, twin load, + * secondary, user + */ #define ASI_NUCLEUS_QUAD_LDD 0x24 /* Cachable, qword load */ +#define ASI_QUEUE 0x25 /* (4V) Interrupt Queue Registers */ +#define ASI_QUAD_LDD_PHYS_4V 0x26 /* (4V) Physical, qword load */ #define ASI_NUCLEUS_QUAD_LDD_L 0x2c /* Cachable, qword load, l-endian */ +#define ASI_QUAD_LDD_PHYS_L_4V 0x2e /* (4V) Phys, qword load, l-endian */ #define ASI_PCACHE_DATA_STATUS 0x30 /* (III) PCache data stat RAM diag */ #define ASI_PCACHE_DATA 0x31 /* (III) PCache data RAM diag */ #define ASI_PCACHE_TAG 0x32 /* (III) PCache tag RAM diag */ @@ -137,6 +150,9 @@ #define ASI_FL16_SL 0xdb /* Secondary, 1 16-bit, fpu ld/st,L*/ #define ASI_BLK_COMMIT_P 0xe0 /* Primary, blk store commit */ #define ASI_BLK_COMMIT_S 0xe1 /* Secondary, blk store commit */ +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load, + * primary, implicit + */ #define ASI_BLK_P 0xf0 /* Primary, blk ld/st */ #define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */ #define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */ diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 74de79d..c66a81b 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -1,41 +1,224 @@ /* cpudata.h: Per-cpu parameters. * - * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com) + * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net) */ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H +#include <asm/hypervisor.h> +#include <asm/asi.h> + +#ifndef __ASSEMBLY__ + #include <linux/percpu.h> +#include <linux/threads.h> typedef struct { /* Dcache line 1 */ unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ unsigned int multiplier; unsigned int counter; - unsigned int idle_volume; + unsigned int __pad1; unsigned long clock_tick; /* %tick's per second */ unsigned long udelay_val; - /* Dcache line 2 */ - unsigned int pgcache_size; - unsigned int __pad1; - unsigned long *pte_cache[2]; - unsigned long *pgd_cache; - - /* Dcache line 3, rarely used */ + /* Dcache line 2, rarely used */ unsigned int dcache_size; unsigned int dcache_line_size; unsigned int icache_size; unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - unsigned int __pad2; unsigned int __pad3; + unsigned int __pad4; } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) #define local_cpu_data() __get_cpu_var(__cpu_data) +/* Trap handling code needs to get at a few critical values upon + * trap entry and to process TSB misses. These cannot be in the + * per_cpu() area as we really need to lock them into the TLB and + * thus make them part of the main kernel image. As a result we + * try to make this as small as possible. + * + * This is padded out and aligned to 64-bytes to avoid false sharing + * on SMP. + */ + +/* If you modify the size of this structure, please update + * TRAP_BLOCK_SZ_SHIFT below. + */ +struct thread_info; +struct trap_per_cpu { +/* D-cache line 1: Basic thread information, cpu and device mondo queues */ + struct thread_info *thread; + unsigned long pgd_paddr; + unsigned long cpu_mondo_pa; + unsigned long dev_mondo_pa; + +/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */ + unsigned long resum_mondo_pa; + unsigned long resum_kernel_buf_pa; + unsigned long nonresum_mondo_pa; + unsigned long nonresum_kernel_buf_pa; + +/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */ + struct hv_fault_status fault_info; + +/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */ + unsigned long cpu_mondo_block_pa; + unsigned long cpu_list_pa; + unsigned long __pad1[2]; + +/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */ + unsigned long __pad2[4]; +} __attribute__((aligned(64))); +extern struct trap_per_cpu trap_block[NR_CPUS]; +extern void init_cur_cpu_trap(struct thread_info *); +extern void setup_tba(void); + +struct cpuid_patch_entry { + unsigned int addr; + unsigned int cheetah_safari[4]; + unsigned int cheetah_jbus[4]; + unsigned int starfire[4]; + unsigned int sun4v[4]; +}; +extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; + +struct sun4v_1insn_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch, + __sun4v_1insn_patch_end; + +struct sun4v_2insn_patch_entry { + unsigned int addr; + unsigned int insns[2]; +}; +extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, + __sun4v_2insn_patch_end; + +#endif /* !(__ASSEMBLY__) */ + +#define TRAP_PER_CPU_THREAD 0x00 +#define TRAP_PER_CPU_PGD_PADDR 0x08 +#define TRAP_PER_CPU_CPU_MONDO_PA 0x10 +#define TRAP_PER_CPU_DEV_MONDO_PA 0x18 +#define TRAP_PER_CPU_RESUM_MONDO_PA 0x20 +#define TRAP_PER_CPU_RESUM_KBUF_PA 0x28 +#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30 +#define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38 +#define TRAP_PER_CPU_FAULT_INFO 0x40 +#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0 +#define TRAP_PER_CPU_CPU_LIST_PA 0xc8 + +#define TRAP_BLOCK_SZ_SHIFT 8 + +#include <asm/scratchpad.h> + +#define __GET_CPUID(REG) \ + /* Spitfire implementation (default). */ \ +661: ldxa [%g0] ASI_UPA_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x1f, REG; \ + nop; \ + .section .cpuid_patch, "ax"; \ + /* Instruction location. */ \ + .word 661b; \ + /* Cheetah Safari implementation. */ \ + ldxa [%g0] ASI_SAFARI_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x3ff, REG; \ + nop; \ + /* Cheetah JBUS implementation. */ \ + ldxa [%g0] ASI_JBUS_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x1f, REG; \ + nop; \ + /* Starfire implementation. */ \ + sethi %hi(0x1fff40000d0 >> 9), REG; \ + sllx REG, 9, REG; \ + or REG, 0xd0, REG; \ + lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ + /* sun4v implementation. */ \ + mov SCRATCHPAD_CPUID, REG; \ + ldxa [REG] ASI_SCRATCHPAD, REG; \ + nop; \ + nop; \ + .previous; + +#ifdef CONFIG_SMP + +#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(trap_block), DEST; \ + sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ + or DEST, %lo(trap_block), DEST; \ + add DEST, TMP, DEST; \ + +/* Clobbers TMP, current address space PGD phys address into DEST. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; + +/* Clobbers TMP, loads local processor's IRQ work area into DEST. */ +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(__irq_work), DEST; \ + sllx TMP, 6, TMP; \ + or DEST, %lo(__irq_work), DEST; \ + add DEST, TMP, DEST; + +/* Clobbers TMP, loads DEST with current thread info pointer. */ +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_THREAD], DEST; + +/* Given the current thread info pointer in THR, load the per-cpu + * area base of the current processor into DEST. REG1, REG2, and REG3 are + * clobbered. + * + * You absolutely cannot use DEST as a temporary in this code. The + * reason is that traps can happen during execution, and return from + * trap will load the fully resolved DEST per-cpu base. This can corrupt + * the calculations done by the macro mid-stream. + */ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ + ldub [THR + TI_CPU], REG1; \ + sethi %hi(__per_cpu_shift), REG3; \ + sethi %hi(__per_cpu_base), REG2; \ + ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ + ldx [REG2 + %lo(__per_cpu_base)], REG2; \ + sllx REG1, REG3, REG3; \ + add REG3, REG2, DEST; + +#else + +#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + or DEST, %lo(trap_block), DEST; \ + +/* Uniprocessor versions, we know the cpuid is zero. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; + +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + sethi %hi(__irq_work), DEST; \ + or DEST, %lo(__irq_work), DEST; + +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_THREAD], DEST; + +/* No per-cpu areas on uniprocessor, so no need to load DEST. */ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) + +#endif /* !(CONFIG_SMP) */ + #endif /* _SPARC64_CPUDATA_H */ diff --git a/include/asm-sparc64/elf.h b/include/asm-sparc64/elf.h index 69539a8..303d85e 100644 --- a/include/asm-sparc64/elf.h +++ b/include/asm-sparc64/elf.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/spitfire.h> #endif /* @@ -68,6 +69,7 @@ #define HWCAP_SPARC_MULDIV 8 #define HWCAP_SPARC_V9 16 #define HWCAP_SPARC_ULTRA3 32 +#define HWCAP_SPARC_BLKINIT 64 /* * These are used to set parameters in the core dumps. @@ -145,11 +147,21 @@ typedef struct { instruction set this cpu supports. */ /* On Ultra, we support all of the v8 capabilities. */ -#define ELF_HWCAP ((HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | \ - HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | \ - HWCAP_SPARC_V9) | \ - ((tlb_type == cheetah || tlb_type == cheetah_plus) ? \ - HWCAP_SPARC_ULTRA3 : 0)) +static inline unsigned int sparc64_elf_hwcap(void) +{ + unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | + HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | + HWCAP_SPARC_V9); + + if (tlb_type == cheetah || tlb_type == cheetah_plus) + cap |= HWCAP_SPARC_ULTRA3; + else if (tlb_type == hypervisor) + cap |= HWCAP_SPARC_BLKINIT; + + return cap; +} + +#define ELF_HWCAP sparc64_elf_hwcap(); /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h index 0abd3a6..67960a7 100644 --- a/include/asm-sparc64/head.h +++ b/include/asm-sparc64/head.h @@ -4,12 +4,21 @@ #include <asm/pstate.h> + /* wrpr %g0, val, %gl */ +#define SET_GL(val) \ + .word 0xa1902000 | val + + /* rdpr %gl, %gN */ +#define GET_GL_GLOBAL(N) \ + .word 0x81540000 | (N << 25) + #define KERNBASE 0x400000 #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ) #define __CHEETAH_ID 0x003e0014 #define __JALAPENO_ID 0x003e0016 +#define __SERRANO_ID 0x003e0022 #define CHEETAH_MANUF 0x003e #define CHEETAH_IMPL 0x0014 /* Ultra-III */ @@ -19,6 +28,12 @@ #define PANTHER_IMPL 0x0019 /* Ultra-IV+ */ #define SERRANO_IMPL 0x0022 /* Ultra-IIIi+ */ +#define BRANCH_IF_SUN4V(tmp1,label) \ + sethi %hi(is_sun4v), %tmp1; \ + lduw [%tmp1 + %lo(is_sun4v)], %tmp1; \ + brnz,pn %tmp1, label; \ + nop + #define BRANCH_IF_CHEETAH_BASE(tmp1,tmp2,label) \ rdpr %ver, %tmp1; \ sethi %hi(__CHEETAH_ID), %tmp2; \ diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h new file mode 100644 index 0000000..612bf31 --- /dev/null +++ b/include/asm-sparc64/hypervisor.h @@ -0,0 +1,2128 @@ +#ifndef _SPARC64_HYPERVISOR_H +#define _SPARC64_HYPERVISOR_H + +/* Sun4v hypervisor interfaces and defines. + * + * Hypervisor calls are made via traps to software traps number 0x80 + * and above. Registers %o0 to %o5 serve as argument, status, and + * return value registers. + * + * There are two kinds of these traps. First there are the normal + * "fast traps" which use software trap 0x80 and encode the function + * to invoke by number in register %o5. Argument and return value + * handling is as follows: + * + * ----------------------------------------------- + * | %o5 | function number | undefined | + * | %o0 | argument 0 | return status | + * | %o1 | argument 1 | return value 1 | + * | %o2 | argument 2 | return value 2 | + * | %o3 | argument 3 | return value 3 | + * | %o4 | argument 4 | return value 4 | + * ----------------------------------------------- + * + * The second type are "hyper-fast traps" which encode the function + * number in the software trap number itself. So these use trap + * numbers > 0x80. The register usage for hyper-fast traps is as + * follows: + * + * ----------------------------------------------- + * | %o0 | argument 0 | return status | + * | %o1 | argument 1 | return value 1 | + * | %o2 | argument 2 | return value 2 | + * | %o3 | argument 3 | return value 3 | + * | %o4 | argument 4 | return value 4 | + * ----------------------------------------------- + * + * Registers providing explicit arguments to the hypervisor calls + * are volatile across the call. Upon return their values are + * undefined unless explicitly specified as containing a particular + * return value by the specific call. The return status is always + * returned in register %o0, zero indicates a successful execution of + * the hypervisor call and other values indicate an error status as + * defined below. So, for example, if a hyper-fast trap takes + * arguments 0, 1, and 2, then %o0, %o1, and %o2 are volatile across + * the call and %o3, %o4, and %o5 would be preserved. + * + * If the hypervisor trap is invalid, or the fast trap function number + * is invalid, HV_EBADTRAP will be returned in %o0. Also, all 64-bits + * of the argument and return values are significant. + */ + +/* Trap numbers. */ +#define HV_FAST_TRAP 0x80 +#define HV_MMU_MAP_ADDR_TRAP 0x83 +#define HV_MMU_UNMAP_ADDR_TRAP 0x84 +#define HV_TTRACE_ADDENTRY_TRAP 0x85 +#define HV_CORE_TRAP 0xff + +/* Error codes. */ +#define HV_EOK 0 /* Successful return */ +#define HV_ENOCPU 1 /* Invalid CPU id */ +#define HV_ENORADDR 2 /* Invalid real address */ +#define HV_ENOINTR 3 /* Invalid interrupt id */ +#define HV_EBADPGSZ 4 /* Invalid pagesize encoding */ +#define HV_EBADTSB 5 /* Invalid TSB description */ +#define HV_EINVAL 6 /* Invalid argument */ +#define HV_EBADTRAP 7 /* Invalid function number */ +#define HV_EBADALIGN 8 /* Invalid address alignment */ +#define HV_EWOULDBLOCK 9 /* Cannot complete w/o blocking */ +#define HV_ENOACCESS 10 /* No access to resource */ +#define HV_EIO 11 /* I/O error */ +#define HV_ECPUERROR 12 /* CPU in error state */ +#define HV_ENOTSUPPORTED 13 /* Function not supported */ +#define HV_ENOMAP 14 /* No mapping found */ +#define HV_ETOOMANY 15 /* Too many items specified */ + +/* mach_exit() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_EXIT + * ARG0: exit code + * ERRORS: This service does not return. + * + * Stop all CPUs in the virtual domain and place them into the stopped + * state. The 64-bit exit code may be passed to a service entity as + * the domain's exit status. On systems without a service entity, the + * domain will undergo a reset, and the boot firmware will be + * reloaded. + * + * This function will never return to the guest that invokes it. + * + * Note: By convention an exit code of zero denotes a successful exit by + * the guest code. A non-zero exit code denotes a guest specific + * error indication. + * + */ +#define HV_FAST_MACH_EXIT 0x00 + +/* Domain services. */ + +/* mach_desc() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_DESC + * ARG0: buffer + * ARG1: length + * RET0: status + * RET1: length + * ERRORS: HV_EBADALIGN Buffer is badly aligned + * HV_ENORADDR Buffer is to an illegal real address. + * HV_EINVAL Buffer length is too small for complete + * machine description. + * + * Copy the most current machine description into the buffer indicated + * by the real address in ARG0. The buffer provided must be 16 byte + * aligned. Upon success or HV_EINVAL, this service returns the + * actual size of the machine description in the RET1 return value. + * + * Note: A method of determining the appropriate buffer size for the + * machine description is to first call this service with a buffer + * length of 0 bytes. + */ +#define HV_FAST_MACH_DESC 0x01 + +/* mach_exit() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_SIR + * ERRORS: This service does not return. + * + * Perform a software initiated reset of the virtual machine domain. + * All CPUs are captured as soon as possible, all hardware devices are + * returned to the entry default state, and the domain is restarted at + * the SIR (trap type 0x04) real trap table (RTBA) entry point on one + * of the CPUs. The single CPU restarted is selected as determined by + * platform specific policy. Memory is preserved across this + * operation. + */ +#define HV_FAST_MACH_SIR 0x02 + +/* mach_set_soft_state() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_SET_SOFT_STATE + * ARG0: software state + * ARG1: software state description pointer + * RET0: status + * ERRORS: EINVAL software state not valid or software state + * description is not NULL terminated + * ENORADDR software state description pointer is not a + * valid real address + * EBADALIGNED software state description is not correctly + * aligned + * + * This allows the guest to report it's soft state to the hypervisor. There + * are two primary components to this state. The first part states whether + * the guest software is running or not. The second containts optional + * details specific to the software. + * + * The software state argument is defined below in HV_SOFT_STATE_*, and + * indicates whether the guest is operating normally or in a transitional + * state. + * + * The software state description argument is a real address of a data buffer + * of size 32-bytes aligned on a 32-byte boundary. It is treated as a NULL + * terminated 7-bit ASCII string of up to 31 characters not including the + * NULL termination. + */ +#define HV_FAST_MACH_SET_SOFT_STATE 0x03 +#define HV_SOFT_STATE_NORMAL 0x01 +#define HV_SOFT_STATE_TRANSITION 0x02 + +/* mach_get_soft_state() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MACH_GET_SOFT_STATE + * ARG0: software state description pointer + * RET0: status + * RET1: software state + * ERRORS: ENORADDR software state description pointer is not a + * valid real address + * EBADALIGNED software state description is not correctly + * aligned + * + * Retrieve the current value of the guest's software state. The rules + * for the software state pointer are the same as for mach_set_soft_state() + * above. + */ +#define HV_FAST_MACH_GET_SOFT_STATE 0x04 + +/* CPU services. + * + * CPUs represent devices that can execute software threads. A single + * chip that contains multiple cores or strands is represented as + * multiple CPUs with unique CPU identifiers. CPUs are exported to + * OBP via the machine description (and to the OS via the OBP device + * tree). CPUs are always in one of three states: stopped, running, + * or error. + * + * A CPU ID is a pre-assigned 16-bit value that uniquely identifies a + * CPU within a logical domain. Operations that are to be performed + * on multiple CPUs specify them via a CPU list. A CPU list is an + * array in real memory, of which each 16-bit word is a CPU ID. CPU + * lists are passed through the API as two arguments. The first is + * the number of entries (16-bit words) in the CPU list, and the + * second is the (real address) pointer to the CPU ID list. + */ + +/* cpu_start() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_START + * ARG0: CPU ID + * ARG1: PC + * ARG1: RTBA + * ARG1: target ARG0 + * RET0: status + * ERRORS: ENOCPU Invalid CPU ID + * EINVAL Target CPU ID is not in the stopped state + * ENORADDR Invalid PC or RTBA real address + * EBADALIGN Unaligned PC or unaligned RTBA + * EWOULDBLOCK Starting resources are not available + * + * Start CPU with given CPU ID with PC in %pc and with a real trap + * base address value of RTBA. The indicated CPU must be in the + * stopped state. The supplied RTBA must be aligned on a 256 byte + * boundary. On successful completion, the specified CPU will be in + * the running state and will be supplied with "target ARG0" in %o0 + * and RTBA in %tba. + */ +#define HV_FAST_CPU_START 0x10 + +/* cpu_stop() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_STOP + * ARG0: CPU ID + * RET0: status + * ERRORS: ENOCPU Invalid CPU ID + * EINVAL Target CPU ID is the current cpu + * EINVAL Target CPU ID is not in the running state + * EWOULDBLOCK Stopping resources are not available + * ENOTSUPPORTED Not supported on this platform + * + * The specified CPU is stopped. The indicated CPU must be in the + * running state. On completion, it will be in the stopped state. It + * is not legal to stop the current CPU. + * + * Note: As this service cannot be used to stop the current cpu, this service + * may not be used to stop the last running CPU in a domain. To stop + * and exit a running domain, a guest must use the mach_exit() service. + */ +#define HV_FAST_CPU_STOP 0x11 + +/* cpu_yield() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_YIELD + * RET0: status + * ERRORS: No possible error. + * + * Suspend execution on the current CPU. Execution will resume when + * an interrupt (device, %stick_compare, or cross-call) is targeted to + * the CPU. On some CPUs, this API may be used by the hypervisor to + * save power by disabling hardware strands. + */ +#define HV_FAST_CPU_YIELD 0x12 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_cpu_yield(void); +#endif + +/* cpu_qconf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_QCONF + * ARG0: queue + * ARG1: base real address + * ARG2: number of entries + * RET0: status + * ERRORS: ENORADDR Invalid base real address + * EINVAL Invalid queue or number of entries is less + * than 2 or too large. + * EBADALIGN Base real address is not correctly aligned + * for size. + * + * Configure the given queue to be placed at the given base real + * address, with the given number of entries. The number of entries + * must be a power of 2. The base real address must be aligned + * exactly to match the queue size. Each queue entry is 64 bytes + * long, so for example a 32 entry queue must be aligned on a 2048 + * byte real address boundary. + * + * The specified queue is unconfigured if the number of entries is given + * as zero. + * + * For the current version of this API service, the argument queue is defined + * as follows: + * + * queue description + * ----- ------------------------- + * 0x3c cpu mondo queue + * 0x3d device mondo queue + * 0x3e resumable error queue + * 0x3f non-resumable error queue + * + * Note: The maximum number of entries for each queue for a specific cpu may + * be determined from the machine description. + */ +#define HV_FAST_CPU_QCONF 0x14 +#define HV_CPU_QUEUE_CPU_MONDO 0x3c +#define HV_CPU_QUEUE_DEVICE_MONDO 0x3d +#define HV_CPU_QUEUE_RES_ERROR 0x3e +#define HV_CPU_QUEUE_NONRES_ERROR 0x3f + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_cpu_qconf(unsigned long type, + unsigned long queue_paddr, + unsigned long num_queue_entries); +#endif + +/* cpu_qinfo() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_QINFO + * ARG0: queue + * RET0: status + * RET1: base real address + * RET1: number of entries + * ERRORS: EINVAL Invalid queue + * + * Return the configuration info for the given queue. The base real + * address and number of entries of the defined queue are returned. + * The queue argument values are the same as for cpu_qconf() above. + * + * If the specified queue is a valid queue number, but no queue has + * been defined, the number of entries will be set to zero and the + * base real address returned is undefined. + */ +#define HV_FAST_CPU_QINFO 0x15 + +/* cpu_mondo_send() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_MONDO_SEND + * ARG0-1: CPU list + * ARG2: data real address + * RET0: status + * ERRORS: EBADALIGN Mondo data is not 64-byte aligned or CPU list + * is not 2-byte aligned. + * ENORADDR Invalid data mondo address, or invalid cpu list + * address. + * ENOCPU Invalid cpu in CPU list + * EWOULDBLOCK Some or all of the listed CPUs did not receive + * the mondo + * ECPUERROR One or more of the listed CPUs are in error + * state, use HV_FAST_CPU_STATE to see which ones + * EINVAL CPU list includes caller's CPU ID + * + * Send a mondo interrupt to the CPUs in the given CPU list with the + * 64-bytes at the given data real address. The data must be 64-byte + * aligned. The mondo data will be delivered to the cpu_mondo queues + * of the recipient CPUs. + * + * In all cases, error or not, the CPUs in the CPU list to which the + * mondo has been successfully delivered will be indicated by having + * their entry in CPU list updated with the value 0xffff. + */ +#define HV_FAST_CPU_MONDO_SEND 0x42 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa); +#endif + +/* cpu_myid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_MYID + * RET0: status + * RET1: CPU ID + * ERRORS: No errors defined. + * + * Return the hypervisor ID handle for the current CPU. Use by a + * virtual CPU to discover it's own identity. + */ +#define HV_FAST_CPU_MYID 0x16 + +/* cpu_state() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_STATE + * ARG0: CPU ID + * RET0: status + * RET1: state + * ERRORS: ENOCPU Invalid CPU ID + * + * Retrieve the current state of the CPU with the given CPU ID. + */ +#define HV_FAST_CPU_STATE 0x17 +#define HV_CPU_STATE_STOPPED 0x01 +#define HV_CPU_STATE_RUNNING 0x02 +#define HV_CPU_STATE_ERROR 0x03 + +#ifndef __ASSEMBLY__ +extern long sun4v_cpu_state(unsigned long cpuid); +#endif + +/* cpu_set_rtba() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_SET_RTBA + * ARG0: RTBA + * RET0: status + * RET1: previous RTBA + * ERRORS: ENORADDR Invalid RTBA real address + * EBADALIGN RTBA is incorrectly aligned for a trap table + * + * Set the real trap base address of the local cpu to the given RTBA. + * The supplied RTBA must be aligned on a 256 byte boundary. Upon + * success the previous value of the RTBA is returned in RET1. + * + * Note: This service does not affect %tba + */ +#define HV_FAST_CPU_SET_RTBA 0x18 + +/* cpu_set_rtba() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_GET_RTBA + * RET0: status + * RET1: previous RTBA + * ERRORS: No possible error. + * + * Returns the current value of RTBA in RET1. + */ +#define HV_FAST_CPU_GET_RTBA 0x19 + +/* MMU services. + * + * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls. + */ +#ifndef __ASSEMBLY__ +struct hv_tsb_descr { + unsigned short pgsz_idx; + unsigned short assoc; + unsigned int num_ttes; /* in TTEs */ + unsigned int ctx_idx; + unsigned int pgsz_mask; + unsigned long tsb_base; + unsigned long resv; +}; +#endif +#define HV_TSB_DESCR_PGSZ_IDX_OFFSET 0x00 +#define HV_TSB_DESCR_ASSOC_OFFSET 0x02 +#define HV_TSB_DESCR_NUM_TTES_OFFSET 0x04 +#define HV_TSB_DESCR_CTX_IDX_OFFSET 0x08 +#define HV_TSB_DESCR_PGSZ_MASK_OFFSET 0x0c +#define HV_TSB_DESCR_TSB_BASE_OFFSET 0x10 +#define HV_TSB_DESCR_RESV_OFFSET 0x18 + +/* Page size bitmask. */ +#define HV_PGSZ_MASK_8K (1 << 0) +#define HV_PGSZ_MASK_64K (1 << 1) +#define HV_PGSZ_MASK_512K (1 << 2) +#define HV_PGSZ_MASK_4MB (1 << 3) +#define HV_PGSZ_MASK_32MB (1 << 4) +#define HV_PGSZ_MASK_256MB (1 << 5) +#define HV_PGSZ_MASK_2GB (1 << 6) +#define HV_PGSZ_MASK_16GB (1 << 7) + +/* Page size index. The value given in the TSB descriptor must correspond + * to the smallest page size specified in the pgsz_mask page size bitmask. + */ +#define HV_PGSZ_IDX_8K 0 +#define HV_PGSZ_IDX_64K 1 +#define HV_PGSZ_IDX_512K 2 +#define HV_PGSZ_IDX_4MB 3 +#define HV_PGSZ_IDX_32MB 4 +#define HV_PGSZ_IDX_256MB 5 +#define HV_PGSZ_IDX_2GB 6 +#define HV_PGSZ_IDX_16GB 7 + +/* MMU fault status area. + * + * MMU related faults have their status and fault address information + * placed into a memory region made available by privileged code. Each + * virtual processor must make a mmu_fault_area_conf() call to tell the + * hypervisor where that processor's fault status should be stored. + * + * The fault status block is a multiple of 64-bytes and must be aligned + * on a 64-byte boundary. + */ +#ifndef __ASSEMBLY__ +struct hv_fault_status { + unsigned long i_fault_type; + unsigned long i_fault_addr; + unsigned long i_fault_ctx; + unsigned long i_reserved[5]; + unsigned long d_fault_type; + unsigned long d_fault_addr; + unsigned long d_fault_ctx; + unsigned long d_reserved[5]; +}; +#endif +#define HV_FAULT_I_TYPE_OFFSET 0x00 +#define HV_FAULT_I_ADDR_OFFSET 0x08 +#define HV_FAULT_I_CTX_OFFSET 0x10 +#define HV_FAULT_D_TYPE_OFFSET 0x40 +#define HV_FAULT_D_ADDR_OFFSET 0x48 +#define HV_FAULT_D_CTX_OFFSET 0x50 + +#define HV_FAULT_TYPE_FAST_MISS 1 +#define HV_FAULT_TYPE_FAST_PROT 2 +#define HV_FAULT_TYPE_MMU_MISS 3 +#define HV_FAULT_TYPE_INV_RA 4 +#define HV_FAULT_TYPE_PRIV_VIOL 5 +#define HV_FAULT_TYPE_PROT_VIOL 6 +#define HV_FAULT_TYPE_NFO 7 +#define HV_FAULT_TYPE_NFO_SEFF 8 +#define HV_FAULT_TYPE_INV_VA 9 +#define HV_FAULT_TYPE_INV_ASI 10 +#define HV_FAULT_TYPE_NC_ATOMIC 11 +#define HV_FAULT_TYPE_PRIV_ACT 12 +#define HV_FAULT_TYPE_RESV1 13 +#define HV_FAULT_TYPE_UNALIGNED 14 +#define HV_FAULT_TYPE_INV_PGSZ 15 +/* Values 16 --> -2 are reserved. */ +#define HV_FAULT_TYPE_MULTIPLE -1 + +/* Flags argument for mmu_{map,unmap}_addr(), mmu_demap_{page,context,all}(), + * and mmu_{map,unmap}_perm_addr(). + */ +#define HV_MMU_DMMU 0x01 +#define HV_MMU_IMMU 0x02 +#define HV_MMU_ALL (HV_MMU_DMMU | HV_MMU_IMMU) + +/* mmu_map_addr() + * TRAP: HV_MMU_MAP_ADDR_TRAP + * ARG0: virtual address + * ARG1: mmu context + * ARG2: TTE + * ARG3: flags (HV_MMU_{IMMU,DMMU}) + * ERRORS: EINVAL Invalid virtual address, mmu context, or flags + * EBADPGSZ Invalid page size value + * ENORADDR Invalid real address in TTE + * + * Create a non-permanent mapping using the given TTE, virtual + * address, and mmu context. The flags argument determines which + * (data, or instruction, or both) TLB the mapping gets loaded into. + * + * The behavior is undefined if the valid bit is clear in the TTE. + * + * Note: This API call is for privileged code to specify temporary translation + * mappings without the need to create and manage a TSB. + */ + +/* mmu_unmap_addr() + * TRAP: HV_MMU_UNMAP_ADDR_TRAP + * ARG0: virtual address + * ARG1: mmu context + * ARG2: flags (HV_MMU_{IMMU,DMMU}) + * ERRORS: EINVAL Invalid virtual address, mmu context, or flags + * + * Demaps the given virtual address in the given mmu context on this + * CPU. This function is intended to be used to demap pages mapped + * with mmu_map_addr. This service is equivalent to invoking + * mmu_demap_page() with only the current CPU in the CPU list. The + * flags argument determines which (data, or instruction, or both) TLB + * the mapping gets unmapped from. + * + * Attempting to perform an unmap operation for a previously defined + * permanent mapping will have undefined results. + */ + +/* mmu_tsb_ctx0() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTX0 + * ARG0: number of TSB descriptions + * ARG1: TSB descriptions pointer + * RET0: status + * ERRORS: ENORADDR Invalid TSB descriptions pointer or + * TSB base within a descriptor + * EBADALIGN TSB descriptions pointer is not aligned + * to an 8-byte boundary, or TSB base + * within a descriptor is not aligned for + * the given TSB size + * EBADPGSZ Invalid page size in a TSB descriptor + * EBADTSB Invalid associativity or size in a TSB + * descriptor + * EINVAL Invalid number of TSB descriptions, or + * invalid context index in a TSB + * descriptor, or index page size not + * equal to smallest page size in page + * size bitmask field. + * + * Configures the TSBs for the current CPU for virtual addresses with + * context zero. The TSB descriptions pointer is a pointer to an + * array of the given number of TSB descriptions. + * + * Note: The maximum number of TSBs available to a virtual CPU is given by the + * mmu-max-#tsbs property of the cpu's corresponding "cpu" node in the + * machine description. + */ +#define HV_FAST_MMU_TSB_CTX0 0x20 + +/* mmu_tsb_ctxnon0() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTXNON0 + * ARG0: number of TSB descriptions + * ARG1: TSB descriptions pointer + * RET0: status + * ERRORS: Same as for mmu_tsb_ctx0() above. + * + * Configures the TSBs for the current CPU for virtual addresses with + * non-zero contexts. The TSB descriptions pointer is a pointer to an + * array of the given number of TSB descriptions. + * + * Note: A maximum of 16 TSBs may be specified in the TSB description list. + */ +#define HV_FAST_MMU_TSB_CTXNON0 0x21 + +/* mmu_demap_page() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_DEMAP_PAGE + * ARG0: reserved, must be zero + * ARG1: reserved, must be zero + * ARG2: virtual address + * ARG3: mmu context + * ARG4: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid virutal address, context, or + * flags value + * ENOTSUPPORTED ARG0 or ARG1 is non-zero + * + * Demaps any page mapping of the given virtual address in the given + * mmu context for the current virtual CPU. Any virtually tagged + * caches are guaranteed to be kept consistent. The flags argument + * determines which TLB (instruction, or data, or both) participate in + * the operation. + * + * ARG0 and ARG1 are both reserved and must be set to zero. + */ +#define HV_FAST_MMU_DEMAP_PAGE 0x22 + +/* mmu_demap_ctx() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_DEMAP_CTX + * ARG0: reserved, must be zero + * ARG1: reserved, must be zero + * ARG2: mmu context + * ARG3: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid context or flags value + * ENOTSUPPORTED ARG0 or ARG1 is non-zero + * + * Demaps all non-permanent virtual page mappings previously specified + * for the given context for the current virtual CPU. Any virtual + * tagged caches are guaranteed to be kept consistent. The flags + * argument determines which TLB (instruction, or data, or both) + * participate in the operation. + * + * ARG0 and ARG1 are both reserved and must be set to zero. + */ +#define HV_FAST_MMU_DEMAP_CTX 0x23 + +/* mmu_demap_all() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_DEMAP_ALL + * ARG0: reserved, must be zero + * ARG1: reserved, must be zero + * ARG2: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid flags value + * ENOTSUPPORTED ARG0 or ARG1 is non-zero + * + * Demaps all non-permanent virtual page mappings previously specified + * for the current virtual CPU. Any virtual tagged caches are + * guaranteed to be kept consistent. The flags argument determines + * which TLB (instruction, or data, or both) participate in the + * operation. + * + * ARG0 and ARG1 are both reserved and must be set to zero. + */ +#define HV_FAST_MMU_DEMAP_ALL 0x24 + +/* mmu_map_perm_addr() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_MAP_PERM_ADDR + * ARG0: virtual address + * ARG1: reserved, must be zero + * ARG2: TTE + * ARG3: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid virutal address or flags value + * EBADPGSZ Invalid page size value + * ENORADDR Invalid real address in TTE + * ETOOMANY Too many mappings (max of 8 reached) + * + * Create a permanent mapping using the given TTE and virtual address + * for context 0 on the calling virtual CPU. A maximum of 8 such + * permanent mappings may be specified by privileged code. Mappings + * may be removed with mmu_unmap_perm_addr(). + * + * The behavior is undefined if a TTE with the valid bit clear is given. + * + * Note: This call is used to specify address space mappings for which + * privileged code does not expect to receive misses. For example, + * this mechanism can be used to map kernel nucleus code and data. + */ +#define HV_FAST_MMU_MAP_PERM_ADDR 0x25 + +/* mmu_fault_area_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_FAULT_AREA_CONF + * ARG0: real address + * RET0: status + * RET1: previous mmu fault area real address + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Invalid alignment for fault area + * + * Configure the MMU fault status area for the calling CPU. A 64-byte + * aligned real address specifies where MMU fault status information + * is placed. The return value is the previously specified area, or 0 + * for the first invocation. Specifying a fault area at real address + * 0 is not allowed. + */ +#define HV_FAST_MMU_FAULT_AREA_CONF 0x26 + +/* mmu_enable() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_ENABLE + * ARG0: enable flag + * ARG1: return target address + * RET0: status + * ERRORS: ENORADDR Invalid real address when disabling + * translation. + * EBADALIGN The return target address is not + * aligned to an instruction. + * EINVAL The enable flag request the current + * operating mode (e.g. disable if already + * disabled) + * + * Enable or disable virtual address translation for the calling CPU + * within the virtual machine domain. If the enable flag is zero, + * translation is disabled, any non-zero value will enable + * translation. + * + * When this function returns, the newly selected translation mode + * will be active. If the mmu is being enabled, then the return + * target address is a virtual address else it is a real address. + * + * Upon successful completion, control will be returned to the given + * return target address (ie. the cpu will jump to that address). On + * failure, the previous mmu mode remains and the trap simply returns + * as normal with the appropriate error code in RET0. + */ +#define HV_FAST_MMU_ENABLE 0x27 + +/* mmu_unmap_perm_addr() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_UNMAP_PERM_ADDR + * ARG0: virtual address + * ARG1: reserved, must be zero + * ARG2: flags (HV_MMU_{IMMU,DMMU}) + * RET0: status + * ERRORS: EINVAL Invalid virutal address or flags value + * ENOMAP Specified mapping was not found + * + * Demaps any permanent page mapping (established via + * mmu_map_perm_addr()) at the given virtual address for context 0 on + * the current virtual CPU. Any virtual tagged caches are guaranteed + * to be kept consistent. + */ +#define HV_FAST_MMU_UNMAP_PERM_ADDR 0x28 + +/* mmu_tsb_ctx0_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTX0_INFO + * ARG0: max TSBs + * ARG1: buffer pointer + * RET0: status + * RET1: number of TSBs + * ERRORS: EINVAL Supplied buffer is too small + * EBADALIGN The buffer pointer is badly aligned + * ENORADDR Invalid real address for buffer pointer + * + * Return the TSB configuration as previous defined by mmu_tsb_ctx0() + * into the provided buffer. The size of the buffer is given in ARG1 + * in terms of the number of TSB description entries. + * + * Upon return, RET1 always contains the number of TSB descriptions + * previously configured. If zero TSBs were configured, EOK is + * returned with RET1 containing 0. + */ +#define HV_FAST_MMU_TSB_CTX0_INFO 0x29 + +/* mmu_tsb_ctxnon0_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_TSB_CTXNON0_INFO + * ARG0: max TSBs + * ARG1: buffer pointer + * RET0: status + * RET1: number of TSBs + * ERRORS: EINVAL Supplied buffer is too small + * EBADALIGN The buffer pointer is badly aligned + * ENORADDR Invalid real address for buffer pointer + * + * Return the TSB configuration as previous defined by + * mmu_tsb_ctxnon0() into the provided buffer. The size of the buffer + * is given in ARG1 in terms of the number of TSB description entries. + * + * Upon return, RET1 always contains the number of TSB descriptions + * previously configured. If zero TSBs were configured, EOK is + * returned with RET1 containing 0. + */ +#define HV_FAST_MMU_TSB_CTXNON0_INFO 0x2a + +/* mmu_fault_area_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMU_FAULT_AREA_INFO + * RET0: status + * RET1: fault area real address + * ERRORS: No errors defined. + * + * Return the currently defined MMU fault status area for the current + * CPU. The real address of the fault status area is returned in + * RET1, or 0 is returned in RET1 if no fault status area is defined. + * + * Note: mmu_fault_area_conf() may be called with the return value (RET1) + * from this service if there is a need to save and restore the fault + * area for a cpu. + */ +#define HV_FAST_MMU_FAULT_AREA_INFO 0x2b + +/* Cache and Memory services. */ + +/* mem_scrub() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MEM_SCRUB + * ARG0: real address + * ARG1: length + * RET0: status + * RET1: length scrubbed + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Start address or length are not correctly + * aligned + * EINVAL Length is zero + * + * Zero the memory contents in the range real address to real address + * plus length minus 1. Also, valid ECC will be generated for that + * memory address range. Scrubbing is started at the given real + * address, but may not scrub the entire given length. The actual + * length scrubbed will be returned in RET1. + * + * The real address and length must be aligned on an 8K boundary, or + * contain the start address and length from a sun4v error report. + * + * Note: There are two uses for this function. The first use is to block clear + * and initialize memory and the second is to scrub an u ncorrectable + * error reported via a resumable or non-resumable trap. The second + * use requires the arguments to be equal to the real address and length + * provided in a sun4v memory error report. + */ +#define HV_FAST_MEM_SCRUB 0x31 + +/* mem_sync() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MEM_SYNC + * ARG0: real address + * ARG1: length + * RET0: status + * RET1: length synced + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Start address or length are not correctly + * aligned + * EINVAL Length is zero + * + * Force the next access within the real address to real address plus + * length minus 1 to be fetches from main system memory. Less than + * the given length may be synced, the actual amount synced is + * returned in RET1. The real address and length must be aligned on + * an 8K boundary. + */ +#define HV_FAST_MEM_SYNC 0x32 + +/* Time of day services. + * + * The hypervisor maintains the time of day on a per-domain basis. + * Changing the time of day in one domain does not affect the time of + * day on any other domain. + * + * Time is described by a single unsigned 64-bit word which is the + * number of seconds since the UNIX Epoch (00:00:00 UTC, January 1, + * 1970). + */ + +/* tod_get() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TOD_GET + * RET0: status + * RET1: TOD + * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable + * ENOTSUPPORTED If TOD not supported on this platform + * + * Return the current time of day. May block if TOD access is + * temporarily not possible. + */ +#define HV_FAST_TOD_GET 0x50 + +/* tod_set() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TOD_SET + * ARG0: TOD + * RET0: status + * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable + * ENOTSUPPORTED If TOD not supported on this platform + * + * The current time of day is set to the value specified in ARG0. May + * block if TOD access is temporarily not possible. + */ +#define HV_FAST_TOD_SET 0x51 + +/* Console services */ + +/* con_getchar() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CONS_GETCHAR + * RET0: status + * RET1: character + * ERRORS: EWOULDBLOCK No character available. + * + * Returns a character from the console device. If no character is + * available then an EWOULDBLOCK error is returned. If a character is + * available, then the returned status is EOK and the character value + * is in RET1. + * + * A virtual BREAK is represented by the 64-bit value -1. + * + * A virtual HUP signal is represented by the 64-bit value -2. + */ +#define HV_FAST_CONS_GETCHAR 0x60 + +/* con_putchar() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CONS_PUTCHAR + * ARG0: character + * RET0: status + * ERRORS: EINVAL Illegal character + * EWOULDBLOCK Output buffer currently full, would block + * + * Send a character to the console device. Only character values + * between 0 and 255 may be used. Values outside this range are + * invalid except for the 64-bit value -1 which is used to send a + * virtual BREAK. + */ +#define HV_FAST_CONS_PUTCHAR 0x61 + +/* Trap trace services. + * + * The hypervisor provides a trap tracing capability for privileged + * code running on each virtual CPU. Privileged code provides a + * round-robin trap trace queue within which the hypervisor writes + * 64-byte entries detailing hyperprivileged traps taken n behalf of + * privileged code. This is provided as a debugging capability for + * privileged code. + * + * The trap trace control structure is 64-bytes long and placed at the + * start (offset 0) of the trap trace buffer, and is described as + * follows: + */ +#ifndef __ASSEMBLY__ +struct hv_trap_trace_control { + unsigned long head_offset; + unsigned long tail_offset; + unsigned long __reserved[0x30 / sizeof(unsigned long)]; +}; +#endif +#define HV_TRAP_TRACE_CTRL_HEAD_OFFSET 0x00 +#define HV_TRAP_TRACE_CTRL_TAIL_OFFSET 0x08 + +/* The head offset is the offset of the most recently completed entry + * in the trap-trace buffer. The tail offset is the offset of the + * next entry to be written. The control structure is owned and + * modified by the hypervisor. A guest may not modify the control + * structure contents. Attempts to do so will result in undefined + * behavior for the guest. + * + * Each trap trace buffer entry is layed out as follows: + */ +#ifndef __ASSEMBLY__ +struct hv_trap_trace_entry { + unsigned char type; /* Hypervisor or guest entry? */ + unsigned char hpstate; /* Hyper-privileged state */ + unsigned char tl; /* Trap level */ + unsigned char gl; /* Global register level */ + unsigned short tt; /* Trap type */ + unsigned short tag; /* Extended trap identifier */ + unsigned long tstate; /* Trap state */ + unsigned long tick; /* Tick */ + unsigned long tpc; /* Trap PC */ + unsigned long f1; /* Entry specific */ + unsigned long f2; /* Entry specific */ + unsigned long f3; /* Entry specific */ + unsigned long f4; /* Entry specific */ +}; +#endif +#define HV_TRAP_TRACE_ENTRY_TYPE 0x00 +#define HV_TRAP_TRACE_ENTRY_HPSTATE 0x01 +#define HV_TRAP_TRACE_ENTRY_TL 0x02 +#define HV_TRAP_TRACE_ENTRY_GL 0x03 +#define HV_TRAP_TRACE_ENTRY_TT 0x04 +#define HV_TRAP_TRACE_ENTRY_TAG 0x06 +#define HV_TRAP_TRACE_ENTRY_TSTATE 0x08 +#define HV_TRAP_TRACE_ENTRY_TICK 0x10 +#define HV_TRAP_TRACE_ENTRY_TPC 0x18 +#define HV_TRAP_TRACE_ENTRY_F1 0x20 +#define HV_TRAP_TRACE_ENTRY_F2 0x28 +#define HV_TRAP_TRACE_ENTRY_F3 0x30 +#define HV_TRAP_TRACE_ENTRY_F4 0x38 + +/* The type field is encoded as follows. */ +#define HV_TRAP_TYPE_UNDEF 0x00 /* Entry content undefined */ +#define HV_TRAP_TYPE_HV 0x01 /* Hypervisor trap entry */ +#define HV_TRAP_TYPE_GUEST 0xff /* Added via ttrace_addentry() */ + +/* ttrace_buf_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_BUF_CONF + * ARG0: real address + * ARG1: number of entries + * RET0: status + * RET1: number of entries + * ERRORS: ENORADDR Invalid real address + * EINVAL Size is too small + * EBADALIGN Real address not aligned on 64-byte boundary + * + * Requests hypervisor trap tracing and declares a virtual CPU's trap + * trace buffer to the hypervisor. The real address supplies the real + * base address of the trap trace queue and must be 64-byte aligned. + * Specifying a value of 0 for the number of entries disables trap + * tracing for the calling virtual CPU. The buffer allocated must be + * sized for a power of two number of 64-byte trap trace entries plus + * an initial 64-byte control structure. + * + * This may be invoked any number of times so that a virtual CPU may + * relocate a trap trace buffer or create "snapshots" of information. + * + * If the real address is illegal or badly aligned, then trap tracing + * is disabled and an error is returned. + * + * Upon failure with EINVAL, this service call returns in RET1 the + * minimum number of buffer entries required. Upon other failures + * RET1 is undefined. + */ +#define HV_FAST_TTRACE_BUF_CONF 0x90 + +/* ttrace_buf_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_BUF_INFO + * RET0: status + * RET1: real address + * RET2: size + * ERRORS: None defined. + * + * Returns the size and location of the previously declared trap-trace + * buffer. In the event that no buffer was previously defined, or the + * buffer is disabled, this call will return a size of zero bytes. + */ +#define HV_FAST_TTRACE_BUF_INFO 0x91 + +/* ttrace_enable() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_ENABLE + * ARG0: enable + * RET0: status + * RET1: previous enable state + * ERRORS: EINVAL No trap trace buffer currently defined + * + * Enable or disable trap tracing, and return the previous enabled + * state in RET1. Future systems may define various flags for the + * enable argument (ARG0), for the moment a guest should pass + * "(uint64_t) -1" to enable, and "(uint64_t) 0" to disable all + * tracing - which will ensure future compatability. + */ +#define HV_FAST_TTRACE_ENABLE 0x92 + +/* ttrace_freeze() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_TTRACE_FREEZE + * ARG0: freeze + * RET0: status + * RET1: previous freeze state + * ERRORS: EINVAL No trap trace buffer currently defined + * + * Freeze or unfreeze trap tracing, returning the previous freeze + * state in RET1. A guest should pass a non-zero value to freeze and + * a zero value to unfreeze all tracing. The returned previous state + * is 0 for not frozen and 1 for frozen. + */ +#define HV_FAST_TTRACE_FREEZE 0x93 + +/* ttrace_addentry() + * TRAP: HV_TTRACE_ADDENTRY_TRAP + * ARG0: tag (16-bits) + * ARG1: data word 0 + * ARG2: data word 1 + * ARG3: data word 2 + * ARG4: data word 3 + * RET0: status + * ERRORS: EINVAL No trap trace buffer currently defined + * + * Add an entry to the trap trace buffer. Upon return only ARG0/RET0 + * is modified - none of the other registers holding arguments are + * volatile across this hypervisor service. + */ + +/* Core dump services. + * + * Since the hypervisor viraulizes and thus obscures a lot of the + * physical machine layout and state, traditional OS crash dumps can + * be difficult to diagnose especially when the problem is a + * configuration error of some sort. + * + * The dump services provide an opaque buffer into which the + * hypervisor can place it's internal state in order to assist in + * debugging such situations. The contents are opaque and extremely + * platform and hypervisor implementation specific. The guest, during + * a core dump, requests that the hypervisor update any information in + * the dump buffer in preparation to being dumped as part of the + * domain's memory image. + */ + +/* dump_buf_update() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_DUMP_BUF_UPDATE + * ARG0: real address + * ARG1: size + * RET0: status + * RET1: required size of dump buffer + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Real address is not aligned on a 64-byte + * boundary + * EINVAL Size is non-zero but less than minimum size + * required + * ENOTSUPPORTED Operation not supported on current logical + * domain + * + * Declare a domain dump buffer to the hypervisor. The real address + * provided for the domain dump buffer must be 64-byte aligned. The + * size specifies the size of the dump buffer and may be larger than + * the minimum size specified in the machine description. The + * hypervisor will fill the dump buffer with opaque data. + * + * Note: A guest may elect to include dump buffer contents as part of a crash + * dump to assist with debugging. This function may be called any number + * of times so that a guest may relocate a dump buffer, or create + * "snapshots" of any dump-buffer information. Each call to + * dump_buf_update() atomically declares the new dump buffer to the + * hypervisor. + * + * A specified size of 0 unconfigures the dump buffer. If the real + * address is illegal or badly aligned, then any currently active dump + * buffer is disabled and an error is returned. + * + * In the event that the call fails with EINVAL, RET1 contains the + * minimum size requires by the hypervisor for a valid dump buffer. + */ +#define HV_FAST_DUMP_BUF_UPDATE 0x94 + +/* dump_buf_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_DUMP_BUF_INFO + * RET0: status + * RET1: real address of current dump buffer + * RET2: size of current dump buffer + * ERRORS: No errors defined. + * + * Return the currently configures dump buffer description. A + * returned size of 0 bytes indicates an undefined dump buffer. In + * this case the return address in RET1 is undefined. + */ +#define HV_FAST_DUMP_BUF_INFO 0x95 + +/* Device interrupt services. + * + * Device interrupts are allocated to system bus bridges by the hypervisor, + * and described to OBP in the machine description. OBP then describes + * these interrupts to the OS via properties in the device tree. + * + * Terminology: + * + * cpuid Unique opaque value which represents a target cpu. + * + * devhandle Device handle. It uniquely identifies a device, and + * consistes of the lower 28-bits of the hi-cell of the + * first entry of the device's "reg" property in the + * OBP device tree. + * + * devino Device interrupt number. Specifies the relative + * interrupt number within the device. The unique + * combination of devhandle and devino are used to + * identify a specific device interrupt. + * + * Note: The devino value is the same as the values in the + * "interrupts" property or "interrupt-map" property + * in the OBP device tree for that device. + * + * sysino System interrupt number. A 64-bit unsigned interger + * representing a unique interrupt within a virtual + * machine. + * + * intr_state A flag representing the interrupt state for a given + * sysino. The state values are defined below. + * + * intr_enabled A flag representing the 'enabled' state for a given + * sysino. The enable values are defined below. + */ + +#define HV_INTR_STATE_IDLE 0 /* Nothing pending */ +#define HV_INTR_STATE_RECEIVED 1 /* Interrupt received by hardware */ +#define HV_INTR_STATE_DELIVERED 2 /* Interrupt delivered to queue */ + +#define HV_INTR_DISABLED 0 /* sysino not enabled */ +#define HV_INTR_ENABLED 1 /* sysino enabled */ + +/* intr_devino_to_sysino() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_DEVINO2SYSINO + * ARG0: devhandle + * ARG1: devino + * RET0: status + * RET1: sysino + * ERRORS: EINVAL Invalid devhandle/devino + * + * Converts a device specific interrupt number of the given + * devhandle/devino into a system specific ino (sysino). + */ +#define HV_FAST_INTR_DEVINO2SYSINO 0xa0 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_devino_to_sysino(unsigned long devhandle, + unsigned long devino); +#endif + +/* intr_getenabled() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_GETENABLED + * ARG0: sysino + * RET0: status + * RET1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + * ERRORS: EINVAL Invalid sysino + * + * Returns interrupt enabled state in RET1 for the interrupt defined + * by the given sysino. + */ +#define HV_FAST_INTR_GETENABLED 0xa1 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_getenabled(unsigned long sysino); +#endif + +/* intr_setenabled() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_SETENABLED + * ARG0: sysino + * ARG1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + * RET0: status + * ERRORS: EINVAL Invalid sysino or intr_enabled value + * + * Set the 'enabled' state of the interrupt sysino. + */ +#define HV_FAST_INTR_SETENABLED 0xa2 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_setenabled(unsigned long sysino, unsigned long intr_enabled); +#endif + +/* intr_getstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_GETSTATE + * ARG0: sysino + * RET0: status + * RET1: intr_state (HV_INTR_STATE_*) + * ERRORS: EINVAL Invalid sysino + * + * Returns current state of the interrupt defined by the given sysino. + */ +#define HV_FAST_INTR_GETSTATE 0xa3 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_getstate(unsigned long sysino); +#endif + +/* intr_setstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_SETSTATE + * ARG0: sysino + * ARG1: intr_state (HV_INTR_STATE_*) + * RET0: status + * ERRORS: EINVAL Invalid sysino or intr_state value + * + * Sets the current state of the interrupt described by the given sysino + * value. + * + * Note: Setting the state to HV_INTR_STATE_IDLE clears any pending + * interrupt for sysino. + */ +#define HV_FAST_INTR_SETSTATE 0xa4 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state); +#endif + +/* intr_gettarget() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_GETTARGET + * ARG0: sysino + * RET0: status + * RET1: cpuid + * ERRORS: EINVAL Invalid sysino + * + * Returns CPU that is the current target of the interrupt defined by + * the given sysino. The CPU value returned is undefined if the target + * has not been set via intr_settarget(). + */ +#define HV_FAST_INTR_GETTARGET 0xa5 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_gettarget(unsigned long sysino); +#endif + +/* intr_settarget() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_INTR_SETTARGET + * ARG0: sysino + * ARG1: cpuid + * RET0: status + * ERRORS: EINVAL Invalid sysino + * ENOCPU Invalid cpuid + * + * Set the target CPU for the interrupt defined by the given sysino. + */ +#define HV_FAST_INTR_SETTARGET 0xa6 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid); +#endif + +/* PCI IO services. + * + * See the terminology descriptions in the device interrupt services + * section above as those apply here too. Here are terminology + * definitions specific to these PCI IO services: + * + * tsbnum TSB number. Indentifies which io-tsb is used. + * For this version of the specification, tsbnum + * must be zero. + * + * tsbindex TSB index. Identifies which entry in the TSB + * is used. The first entry is zero. + * + * tsbid A 64-bit aligned data structure which contains + * a tsbnum and a tsbindex. Bits 63:32 contain the + * tsbnum and bits 31:00 contain the tsbindex. + * + * Use the HV_PCI_TSBID() macro to construct such + * values. + * + * io_attributes IO attributes for IOMMU mappings. One of more + * of the attritbute bits are stores in a 64-bit + * value. The values are defined below. + * + * r_addr 64-bit real address + * + * pci_device PCI device address. A PCI device address identifies + * a specific device on a specific PCI bus segment. + * A PCI device address ia a 32-bit unsigned integer + * with the following format: + * + * 00000000.bbbbbbbb.dddddfff.00000000 + * + * Use the HV_PCI_DEVICE_BUILD() macro to construct + * such values. + * + * pci_config_offset + * PCI configureation space offset. For conventional + * PCI a value between 0 and 255. For extended + * configuration space, a value between 0 and 4095. + * + * Note: For PCI configuration space accesses, the offset + * must be aligned to the access size. + * + * error_flag A return value which specifies if the action succeeded + * or failed. 0 means no error, non-0 means some error + * occurred while performing the service. + * + * io_sync_direction + * Direction definition for pci_dma_sync(), defined + * below in HV_PCI_SYNC_*. + * + * io_page_list A list of io_page_addresses, an io_page_address is + * a real address. + * + * io_page_list_p A pointer to an io_page_list. + * + * "size based byte swap" - Some functions do size based byte swapping + * which allows sw to access pointers and + * counters in native form when the processor + * operates in a different endianness than the + * IO bus. Size-based byte swapping converts a + * multi-byte field between big-endian and + * little-endian format. + */ + +#define HV_PCI_MAP_ATTR_READ 0x01 +#define HV_PCI_MAP_ATTR_WRITE 0x02 + +#define HV_PCI_DEVICE_BUILD(b,d,f) \ + ((((b) & 0xff) << 16) | \ + (((d) & 0x1f) << 11) | \ + (((f) & 0x07) << 8)) + +#define HV_PCI_TSBID(__tsb_num, __tsb_index) \ + ((((u64)(__tsb_num)) << 32UL) | ((u64)(__tsb_index))) + +#define HV_PCI_SYNC_FOR_DEVICE 0x01 +#define HV_PCI_SYNC_FOR_CPU 0x02 + +/* pci_iommu_map() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_MAP + * ARG0: devhandle + * ARG1: tsbid + * ARG2: #ttes + * ARG3: io_attributes + * ARG4: io_page_list_p + * RET0: status + * RET1: #ttes mapped + * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex/io_attributes + * EBADALIGN Improperly aligned real address + * ENORADDR Invalid real address + * + * Create IOMMU mappings in the sun4v device defined by the given + * devhandle. The mappings are created in the TSB defined by the + * tsbnum component of the given tsbid. The first mapping is created + * in the TSB i ndex defined by the tsbindex component of the given tsbid. + * The call creates up to #ttes mappings, the first one at tsbnum, tsbindex, + * the second at tsbnum, tsbindex + 1, etc. + * + * All mappings are created with the attributes defined by the io_attributes + * argument. The page mapping addresses are described in the io_page_list + * defined by the given io_page_list_p, which is a pointer to the io_page_list. + * The first entry in the io_page_list is the address for the first iotte, the + * 2nd for the 2nd iotte, and so on. + * + * Each io_page_address in the io_page_list must be appropriately aligned. + * #ttes must be greater than zero. For this version of the spec, the tsbnum + * component of the given tsbid must be zero. + * + * Returns the actual number of mappings creates, which may be less than + * or equal to the argument #ttes. If the function returns a value which + * is less than the #ttes, the caller may continus to call the function with + * an updated tsbid, #ttes, io_page_list_p arguments until all pages are + * mapped. + * + * Note: This function does not imply an iotte cache flush. The guest must + * demap an entry before re-mapping it. + */ +#define HV_FAST_PCI_IOMMU_MAP 0xb0 + +/* pci_iommu_demap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_DEMAP + * ARG0: devhandle + * ARG1: tsbid + * ARG2: #ttes + * RET0: status + * RET1: #ttes demapped + * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex + * + * Demap and flush IOMMU mappings in the device defined by the given + * devhandle. Demaps up to #ttes entries in the TSB defined by the tsbnum + * component of the given tsbid, starting at the TSB index defined by the + * tsbindex component of the given tsbid. + * + * For this version of the spec, the tsbnum of the given tsbid must be zero. + * #ttes must be greater than zero. + * + * Returns the actual number of ttes demapped, which may be less than or equal + * to the argument #ttes. If #ttes demapped is less than #ttes, the caller + * may continue to call this function with updated tsbid and #ttes arguments + * until all pages are demapped. + * + * Note: Entries do not have to be mapped to be demapped. A demap of an + * unmapped page will flush the entry from the tte cache. + */ +#define HV_FAST_PCI_IOMMU_DEMAP 0xb1 + +/* pci_iommu_getmap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_GETMAP + * ARG0: devhandle + * ARG1: tsbid + * RET0: status + * RET1: io_attributes + * RET2: real address + * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex + * ENOMAP Mapping is not valid, no translation exists + * + * Read and return the mapping in the device described by the given devhandle + * and tsbid. If successful, the io_attributes shall be returned in RET1 + * and the page address of the mapping shall be returned in RET2. + * + * For this version of the spec, the tsbnum component of the given tsbid + * must be zero. + */ +#define HV_FAST_PCI_IOMMU_GETMAP 0xb2 + +/* pci_iommu_getbypass() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOMMU_GETBYPASS + * ARG0: devhandle + * ARG1: real address + * ARG2: io_attributes + * RET0: status + * RET1: io_addr + * ERRORS: EINVAL Invalid devhandle/io_attributes + * ENORADDR Invalid real address + * ENOTSUPPORTED Function not supported in this implementation. + * + * Create a "special" mapping in the device described by the given devhandle, + * for the given real address and attributes. Return the IO address in RET1 + * if successful. + */ +#define HV_FAST_PCI_IOMMU_GETBYPASS 0xb3 + +/* pci_config_get() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_CONFIG_GET + * ARG0: devhandle + * ARG1: pci_device + * ARG2: pci_config_offset + * ARG3: size + * RET0: status + * RET1: error_flag + * RET2: data + * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size + * EBADALIGN pci_config_offset not size aligned + * ENOACCESS Access to this offset is not permitted + * + * Read PCI configuration space for the adapter described by the given + * devhandle. Read size (1, 2, or 4) bytes of data from the given + * pci_device, at pci_config_offset from the beginning of the device's + * configuration space. If there was no error, RET1 is set to zero and + * RET2 is set to the data read. Insignificant bits in RET2 are not + * guarenteed to have any specific value and therefore must be ignored. + * + * The data returned in RET2 is size based byte swapped. + * + * If an error occurs during the read, set RET1 to a non-zero value. The + * given pci_config_offset must be 'size' aligned. + */ +#define HV_FAST_PCI_CONFIG_GET 0xb4 + +/* pci_config_put() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_CONFIG_PUT + * ARG0: devhandle + * ARG1: pci_device + * ARG2: pci_config_offset + * ARG3: size + * ARG4: data + * RET0: status + * RET1: error_flag + * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size + * EBADALIGN pci_config_offset not size aligned + * ENOACCESS Access to this offset is not permitted + * + * Write PCI configuration space for the adapter described by the given + * devhandle. Write size (1, 2, or 4) bytes of data in a single operation, + * at pci_config_offset from the beginning of the device's configuration + * space. The data argument contains the data to be written to configuration + * space. Prior to writing, the data is size based byte swapped. + * + * If an error occurs during the write access, do not generate an error + * report, do set RET1 to a non-zero value. Otherwise RET1 is zero. + * The given pci_config_offset must be 'size' aligned. + * + * This function is permitted to read from offset zero in the configuration + * space described by the given pci_device if necessary to ensure that the + * write access to config space completes. + */ +#define HV_FAST_PCI_CONFIG_PUT 0xb5 + +/* pci_peek() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_PEEK + * ARG0: devhandle + * ARG1: real address + * ARG2: size + * RET0: status + * RET1: error_flag + * RET2: data + * ERRORS: EINVAL Invalid devhandle or size + * EBADALIGN Improperly aligned real address + * ENORADDR Bad real address + * ENOACCESS Guest access prohibited + * + * Attempt to read the IO address given by the given devhandle, real address, + * and size. Size must be 1, 2, 4, or 8. The read is performed as a single + * access operation using the given size. If an error occurs when reading + * from the given location, do not generate an error report, but return a + * non-zero value in RET1. If the read was successful, return zero in RET1 + * and return the actual data read in RET2. The data returned is size based + * byte swapped. + * + * Non-significant bits in RET2 are not guarenteed to have any specific value + * and therefore must be ignored. If RET1 is returned as non-zero, the data + * value is not guarenteed to have any specific value and should be ignored. + * + * The caller must have permission to read from the given devhandle, real + * address, which must be an IO address. The argument real address must be a + * size aligned address. + * + * The hypervisor implementation of this function must block access to any + * IO address that the guest does not have explicit permission to access. + */ +#define HV_FAST_PCI_PEEK 0xb6 + +/* pci_poke() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_POKE + * ARG0: devhandle + * ARG1: real address + * ARG2: size + * ARG3: data + * ARG4: pci_device + * RET0: status + * RET1: error_flag + * ERRORS: EINVAL Invalid devhandle, size, or pci_device + * EBADALIGN Improperly aligned real address + * ENORADDR Bad real address + * ENOACCESS Guest access prohibited + * ENOTSUPPORTED Function is not supported by implementation + * + * Attempt to write data to the IO address given by the given devhandle, + * real address, and size. Size must be 1, 2, 4, or 8. The write is + * performed as a single access operation using the given size. Prior to + * writing the data is size based swapped. + * + * If an error occurs when writing to the given location, do not generate an + * error report, but return a non-zero value in RET1. If the write was + * successful, return zero in RET1. + * + * pci_device describes the configuration address of the device being + * written to. The implementation may safely read from offset 0 with + * the configuration space of the device described by devhandle and + * pci_device in order to guarantee that the write portion of the operation + * completes + * + * Any error that occurs due to the read shall be reported using the normal + * error reporting mechanisms .. the read error is not suppressed. + * + * The caller must have permission to write to the given devhandle, real + * address, which must be an IO address. The argument real address must be a + * size aligned address. The caller must have permission to read from + * the given devhandle, pci_device cofiguration space offset 0. + * + * The hypervisor implementation of this function must block access to any + * IO address that the guest does not have explicit permission to access. + */ +#define HV_FAST_PCI_POKE 0xb7 + +/* pci_dma_sync() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_DMA_SYNC + * ARG0: devhandle + * ARG1: real address + * ARG2: size + * ARG3: io_sync_direction + * RET0: status + * RET1: #synced + * ERRORS: EINVAL Invalid devhandle or io_sync_direction + * ENORADDR Bad real address + * + * Synchronize a memory region described by the given real address and size, + * for the device defined by the given devhandle using the direction(s) + * defined by the given io_sync_direction. The argument size is the size of + * the memory region in bytes. + * + * Return the actual number of bytes synchronized in the return value #synced, + * which may be less than or equal to the argument size. If the return + * value #synced is less than size, the caller must continue to call this + * function with updated real address and size arguments until the entire + * memory region is synchronized. + */ +#define HV_FAST_PCI_DMA_SYNC 0xb8 + +/* PCI MSI services. */ + +#define HV_MSITYPE_MSI32 0x00 +#define HV_MSITYPE_MSI64 0x01 + +#define HV_MSIQSTATE_IDLE 0x00 +#define HV_MSIQSTATE_ERROR 0x01 + +#define HV_MSIQ_INVALID 0x00 +#define HV_MSIQ_VALID 0x01 + +#define HV_MSISTATE_IDLE 0x00 +#define HV_MSISTATE_DELIVERED 0x01 + +#define HV_MSIVALID_INVALID 0x00 +#define HV_MSIVALID_VALID 0x01 + +#define HV_PCIE_MSGTYPE_PME_MSG 0x18 +#define HV_PCIE_MSGTYPE_PME_ACK_MSG 0x1b +#define HV_PCIE_MSGTYPE_CORR_MSG 0x30 +#define HV_PCIE_MSGTYPE_NONFATAL_MSG 0x31 +#define HV_PCIE_MSGTYPE_FATAL_MSG 0x33 + +#define HV_MSG_INVALID 0x00 +#define HV_MSG_VALID 0x01 + +/* pci_msiq_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_CONF + * ARG0: devhandle + * ARG1: msiqid + * ARG2: real address + * ARG3: number of entries + * RET0: status + * ERRORS: EINVAL Invalid devhandle, msiqid or nentries + * EBADALIGN Improperly aligned real address + * ENORADDR Bad real address + * + * Configure the MSI queue given by the devhandle and msiqid arguments, + * and to be placed at the given real address and be of the given + * number of entries. The real address must be aligned exactly to match + * the queue size. Each queue entry is 64-bytes long, so f.e. a 32 entry + * queue must be aligned on a 2048 byte real address boundary. The MSI-EQ + * Head and Tail are initialized so that the MSI-EQ is 'empty'. + * + * Implementation Note: Certain implementations have fixed sized queues. In + * that case, number of entries must contain the correct + * value. + */ +#define HV_FAST_PCI_MSIQ_CONF 0xc0 + +/* pci_msiq_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_INFO + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: real address + * RET2: number of entries + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Return the configuration information for the MSI queue described + * by the given devhandle and msiqid. The base address of the queue + * is returned in ARG1 and the number of entries is returned in ARG2. + * If the queue is unconfigured, the real address is undefined and the + * number of entries will be returned as zero. + */ +#define HV_FAST_PCI_MSIQ_INFO 0xc1 + +/* pci_msiq_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID) + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the valid state of the MSI-EQ described by the given devhandle and + * msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETVALID 0xc2 + +/* pci_msiq_setvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_SETVALID + * ARG0: devhandle + * ARG1: msiqid + * ARG2: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID) + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msiqid or msiqvalid + * value or MSI EQ is uninitialized + * + * Set the valid state of the MSI-EQ described by the given devhandle and + * msiqid to the given msiqvalid. + */ +#define HV_FAST_PCI_MSIQ_SETVALID 0xc3 + +/* pci_msiq_getstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETSTATE + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR) + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the state of the MSI-EQ described by the given devhandle and + * msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETSTATE 0xc4 + +/* pci_msiq_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID + * ARG0: devhandle + * ARG1: msiqid + * ARG2: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR) + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msiqid or msiqstate + * value or MSI EQ is uninitialized + * + * Set the state of the MSI-EQ described by the given devhandle and + * msiqid to the given msiqvalid. + */ +#define HV_FAST_PCI_MSIQ_SETSTATE 0xc5 + +/* pci_msiq_gethead() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETHEAD + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqhead + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the current MSI EQ queue head for the MSI-EQ described by the + * given devhandle and msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETHEAD 0xc6 + +/* pci_msiq_sethead() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_SETHEAD + * ARG0: devhandle + * ARG1: msiqid + * ARG2: msiqhead + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msiqid or msiqhead, + * or MSI EQ is uninitialized + * + * Set the current MSI EQ queue head for the MSI-EQ described by the + * given devhandle and msiqid. + */ +#define HV_FAST_PCI_MSIQ_SETHEAD 0xc7 + +/* pci_msiq_gettail() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSIQ_GETTAIL + * ARG0: devhandle + * ARG1: msiqid + * RET0: status + * RET1: msiqtail + * ERRORS: EINVAL Invalid devhandle or msiqid + * + * Get the current MSI EQ queue tail for the MSI-EQ described by the + * given devhandle and msiqid. + */ +#define HV_FAST_PCI_MSIQ_GETTAIL 0xc8 + +/* pci_msi_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_GETVALID + * ARG0: devhandle + * ARG1: msinum + * RET0: status + * RET1: msivalidstate + * ERRORS: EINVAL Invalid devhandle or msinum + * + * Get the current valid/enabled state for the MSI defined by the + * given devhandle and msinum. + */ +#define HV_FAST_PCI_MSI_GETVALID 0xc9 + +/* pci_msi_setvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_SETVALID + * ARG0: devhandle + * ARG1: msinum + * ARG2: msivalidstate + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msinum or msivalidstate + * + * Set the current valid/enabled state for the MSI defined by the + * given devhandle and msinum. + */ +#define HV_FAST_PCI_MSI_SETVALID 0xca + +/* pci_msi_getmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_GETMSIQ + * ARG0: devhandle + * ARG1: msinum + * RET0: status + * RET1: msiqid + * ERRORS: EINVAL Invalid devhandle or msinum or MSI is unbound + * + * Get the MSI EQ that the MSI defined by the given devhandle and + * msinum is bound to. + */ +#define HV_FAST_PCI_MSI_GETMSIQ 0xcb + +/* pci_msi_setmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_SETMSIQ + * ARG0: devhandle + * ARG1: msinum + * ARG2: msitype + * ARG3: msiqid + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msinum or msiqid + * + * Set the MSI EQ that the MSI defined by the given devhandle and + * msinum is bound to. + */ +#define HV_FAST_PCI_MSI_SETMSIQ 0xcc + +/* pci_msi_getstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_GETSTATE + * ARG0: devhandle + * ARG1: msinum + * RET0: status + * RET1: msistate + * ERRORS: EINVAL Invalid devhandle or msinum + * + * Get the state of the MSI defined by the given devhandle and msinum. + * If not initialized, return HV_MSISTATE_IDLE. + */ +#define HV_FAST_PCI_MSI_GETSTATE 0xcd + +/* pci_msi_setstate() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSI_SETSTATE + * ARG0: devhandle + * ARG1: msinum + * ARG2: msistate + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msinum or msistate + * + * Set the state of the MSI defined by the given devhandle and msinum. + */ +#define HV_FAST_PCI_MSI_SETSTATE 0xce + +/* pci_msg_getmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_GETMSIQ + * ARG0: devhandle + * ARG1: msgtype + * RET0: status + * RET1: msiqid + * ERRORS: EINVAL Invalid devhandle or msgtype + * + * Get the MSI EQ of the MSG defined by the given devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_GETMSIQ 0xd0 + +/* pci_msg_setmsiq() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_SETMSIQ + * ARG0: devhandle + * ARG1: msgtype + * ARG2: msiqid + * RET0: status + * ERRORS: EINVAL Invalid devhandle, msgtype, or msiqid + * + * Set the MSI EQ of the MSG defined by the given devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_SETMSIQ 0xd1 + +/* pci_msg_getvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_GETVALID + * ARG0: devhandle + * ARG1: msgtype + * RET0: status + * RET1: msgvalidstate + * ERRORS: EINVAL Invalid devhandle or msgtype + * + * Get the valid/enabled state of the MSG defined by the given + * devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_GETVALID 0xd2 + +/* pci_msg_setvalid() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_MSG_SETVALID + * ARG0: devhandle + * ARG1: msgtype + * ARG2: msgvalidstate + * RET0: status + * ERRORS: EINVAL Invalid devhandle or msgtype or msgvalidstate + * + * Set the valid/enabled state of the MSG defined by the given + * devhandle and msgtype. + */ +#define HV_FAST_PCI_MSG_SETVALID 0xd3 + +/* Performance counter services. */ + +#define HV_PERF_JBUS_PERF_CTRL_REG 0x00 +#define HV_PERF_JBUS_PERF_CNT_REG 0x01 +#define HV_PERF_DRAM_PERF_CTRL_REG_0 0x02 +#define HV_PERF_DRAM_PERF_CNT_REG_0 0x03 +#define HV_PERF_DRAM_PERF_CTRL_REG_1 0x04 +#define HV_PERF_DRAM_PERF_CNT_REG_1 0x05 +#define HV_PERF_DRAM_PERF_CTRL_REG_2 0x06 +#define HV_PERF_DRAM_PERF_CNT_REG_2 0x07 +#define HV_PERF_DRAM_PERF_CTRL_REG_3 0x08 +#define HV_PERF_DRAM_PERF_CNT_REG_3 0x09 + +/* get_perfreg() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_GET_PERFREG + * ARG0: performance reg number + * RET0: status + * RET1: performance reg value + * ERRORS: EINVAL Invalid performance register number + * ENOACCESS No access allowed to performance counters + * + * Read the value of the given DRAM/JBUS performance counter/control register. + */ +#define HV_FAST_GET_PERFREG 0x100 + +/* set_perfreg() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_SET_PERFREG + * ARG0: performance reg number + * ARG1: performance reg value + * RET0: status + * ERRORS: EINVAL Invalid performance register number + * ENOACCESS No access allowed to performance counters + * + * Write the given performance reg value to the given DRAM/JBUS + * performance counter/control register. + */ +#define HV_FAST_SET_PERFREG 0x101 + +/* MMU statistics services. + * + * The hypervisor maintains MMU statistics and privileged code provides + * a buffer where these statistics can be collected. It is continually + * updated once configured. The layout is as follows: + */ +#ifndef __ASSEMBLY__ +struct hv_mmu_statistics { + unsigned long immu_tsb_hits_ctx0_8k_tte; + unsigned long immu_tsb_ticks_ctx0_8k_tte; + unsigned long immu_tsb_hits_ctx0_64k_tte; + unsigned long immu_tsb_ticks_ctx0_64k_tte; + unsigned long __reserved1[2]; + unsigned long immu_tsb_hits_ctx0_4mb_tte; + unsigned long immu_tsb_ticks_ctx0_4mb_tte; + unsigned long __reserved2[2]; + unsigned long immu_tsb_hits_ctx0_256mb_tte; + unsigned long immu_tsb_ticks_ctx0_256mb_tte; + unsigned long __reserved3[4]; + unsigned long immu_tsb_hits_ctxnon0_8k_tte; + unsigned long immu_tsb_ticks_ctxnon0_8k_tte; + unsigned long immu_tsb_hits_ctxnon0_64k_tte; + unsigned long immu_tsb_ticks_ctxnon0_64k_tte; + unsigned long __reserved4[2]; + unsigned long immu_tsb_hits_ctxnon0_4mb_tte; + unsigned long immu_tsb_ticks_ctxnon0_4mb_tte; + unsigned long __reserved5[2]; + unsigned long immu_tsb_hits_ctxnon0_256mb_tte; + unsigned long immu_tsb_ticks_ctxnon0_256mb_tte; + unsigned long __reserved6[4]; + unsigned long dmmu_tsb_hits_ctx0_8k_tte; + unsigned long dmmu_tsb_ticks_ctx0_8k_tte; + unsigned long dmmu_tsb_hits_ctx0_64k_tte; + unsigned long dmmu_tsb_ticks_ctx0_64k_tte; + unsigned long __reserved7[2]; + unsigned long dmmu_tsb_hits_ctx0_4mb_tte; + unsigned long dmmu_tsb_ticks_ctx0_4mb_tte; + unsigned long __reserved8[2]; + unsigned long dmmu_tsb_hits_ctx0_256mb_tte; + unsigned long dmmu_tsb_ticks_ctx0_256mb_tte; + unsigned long __reserved9[4]; + unsigned long dmmu_tsb_hits_ctxnon0_8k_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_8k_tte; + unsigned long dmmu_tsb_hits_ctxnon0_64k_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_64k_tte; + unsigned long __reserved10[2]; + unsigned long dmmu_tsb_hits_ctxnon0_4mb_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_4mb_tte; + unsigned long __reserved11[2]; + unsigned long dmmu_tsb_hits_ctxnon0_256mb_tte; + unsigned long dmmu_tsb_ticks_ctxnon0_256mb_tte; + unsigned long __reserved12[4]; +}; +#endif + +/* mmustat_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMUSTAT_CONF + * ARG0: real address + * RET0: status + * RET1: real address + * ERRORS: ENORADDR Invalid real address + * EBADALIGN Real address not aligned on 64-byte boundary + * EBADTRAP API not supported on this processor + * + * Enable MMU statistic gathering using the buffer at the given real + * address on the current virtual CPU. The new buffer real address + * is given in ARG1, and the previously specified buffer real address + * is returned in RET1, or is returned as zero for the first invocation. + * + * If the passed in real address argument is zero, this will disable + * MMU statistic collection on the current virtual CPU. If an error is + * returned then no statistics are collected. + * + * The buffer contents should be initialized to all zeros before being + * given to the hypervisor or else the statistics will be meaningless. + */ +#define HV_FAST_MMUSTAT_CONF 0x102 + +/* mmustat_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_MMUSTAT_INFO + * RET0: status + * RET1: real address + * ERRORS: EBADTRAP API not supported on this processor + * + * Return the current state and real address of the currently configured + * MMU statistics buffer on the current virtual CPU. + */ +#define HV_FAST_MMUSTAT_INFO 0x103 + +/* Function numbers for HV_CORE_TRAP. */ +#define HV_CORE_VER 0x00 +#define HV_CORE_PUTCHAR 0x01 +#define HV_CORE_EXIT 0x02 + +#endif /* !(_SPARC64_HYPERVISOR_H) */ diff --git a/include/asm-sparc64/idprom.h b/include/asm-sparc64/idprom.h index 701483c..77fbf98 100644 --- a/include/asm-sparc64/idprom.h +++ b/include/asm-sparc64/idprom.h @@ -9,15 +9,7 @@ #include <linux/types.h> -/* Offset into the EEPROM where the id PROM is located on the 4c */ -#define IDPROM_OFFSET 0x7d8 - -/* On sun4m; physical. */ -/* MicroSPARC(-II) does not decode 31rd bit, but it works. */ -#define IDPROM_OFFSET_M 0xfd8 - -struct idprom -{ +struct idprom { u8 id_format; /* Format identifier (always 0x01) */ u8 id_machtype; /* Machine type */ u8 id_ethaddr[6]; /* Hardware ethernet address */ @@ -30,6 +22,4 @@ struct idprom extern struct idprom *idprom; extern void idprom_init(void); -#define IDPROM_SIZE (sizeof(struct idprom)) - #endif /* !(_SPARC_IDPROM_H) */ diff --git a/include/asm-sparc64/intr_queue.h b/include/asm-sparc64/intr_queue.h new file mode 100644 index 0000000..206077d --- /dev/null +++ b/include/asm-sparc64/intr_queue.h @@ -0,0 +1,15 @@ +#ifndef _SPARC64_INTR_QUEUE_H +#define _SPARC64_INTR_QUEUE_H + +/* Sun4v interrupt queue registers, accessed via ASI_QUEUE. */ + +#define INTRQ_CPU_MONDO_HEAD 0x3c0 /* CPU mondo head */ +#define INTRQ_CPU_MONDO_TAIL 0x3c8 /* CPU mondo tail */ +#define INTRQ_DEVICE_MONDO_HEAD 0x3d0 /* Device mondo head */ +#define INTRQ_DEVICE_MONDO_TAIL 0x3d8 /* Device mondo tail */ +#define INTRQ_RESUM_MONDO_HEAD 0x3e0 /* Resumable error mondo head */ +#define INTRQ_RESUM_MONDO_TAIL 0x3e8 /* Resumable error mondo tail */ +#define INTRQ_NONRESUM_MONDO_HEAD 0x3f0 /* Non-resumable error mondo head */ +#define INTRQ_NONRESUM_MONDO_TAIL 0x3f8 /* Non-resumable error mondo head */ + +#endif /* !(_SPARC64_INTR_QUEUE_H) */ diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h index 8b70edc..de33d6e 100644 --- a/include/asm-sparc64/irq.h +++ b/include/asm-sparc64/irq.h @@ -72,8 +72,11 @@ struct ino_bucket { #define IMAP_VALID 0x80000000 /* IRQ Enabled */ #define IMAP_TID_UPA 0x7c000000 /* UPA TargetID */ #define IMAP_TID_JBUS 0x7c000000 /* JBUS TargetID */ +#define IMAP_TID_SHIFT 26 #define IMAP_AID_SAFARI 0x7c000000 /* Safari AgentID */ +#define IMAP_AID_SHIFT 26 #define IMAP_NID_SAFARI 0x03e00000 /* Safari NodeID */ +#define IMAP_NID_SHIFT 21 #define IMAP_IGN 0x000007c0 /* IRQ Group Number */ #define IMAP_INO 0x0000003f /* IRQ Number */ #define IMAP_INR 0x000007ff /* Full interrupt number*/ @@ -111,6 +114,7 @@ extern void disable_irq(unsigned int); #define disable_irq_nosync disable_irq extern void enable_irq(unsigned int); extern unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap); +extern unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags); extern unsigned int sbus_build_irq(void *sbus, unsigned int ino); static __inline__ void set_softint(unsigned long bits) diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h index 8627eed..230ba67 100644 --- a/include/asm-sparc64/mmu.h +++ b/include/asm-sparc64/mmu.h @@ -4,20 +4,9 @@ #include <linux/config.h> #include <asm/page.h> #include <asm/const.h> +#include <asm/hypervisor.h> -/* - * For the 8k pagesize kernel, use only 10 hw context bits to optimize some - * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically - * for vpte offset calculation). For other pagesizes, this optimization in - * the tlbhandlers can not be done; but still, all 13 bits can not be used - * because the tlb handlers use "andcc" instruction which sign extends 13 - * bit arguments. - */ -#if PAGE_SHIFT == 13 -#define CTX_NR_BITS 10 -#else -#define CTX_NR_BITS 12 -#endif +#define CTX_NR_BITS 13 #define TAG_CONTEXT_BITS ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL)) @@ -90,8 +79,27 @@ #ifndef __ASSEMBLY__ +#define TSB_ENTRY_ALIGNMENT 16 + +struct tsb { + unsigned long tag; + unsigned long pte; +} __attribute__((aligned(TSB_ENTRY_ALIGNMENT))); + +extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte); +extern void tsb_flush(unsigned long ent, unsigned long tag); +extern void tsb_init(struct tsb *tsb, unsigned long size); + typedef struct { - unsigned long sparc64_ctx_val; + spinlock_t lock; + unsigned long sparc64_ctx_val; + struct tsb *tsb; + unsigned long tsb_rss_limit; + unsigned long tsb_nentries; + unsigned long tsb_reg_val; + unsigned long tsb_map_vaddr; + unsigned long tsb_map_pte; + struct hv_tsb_descr tsb_descr; } mm_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index 57ee7b3..e797432 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -19,96 +19,98 @@ extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; extern void get_new_mmu_context(struct mm_struct *mm); +#ifdef CONFIG_SMP +extern void smp_new_mmu_context_version(void); +#else +#define smp_new_mmu_context_version() do { } while (0) +#endif + +extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +extern void destroy_context(struct mm_struct *mm); + +extern void __tsb_context_switch(unsigned long pgd_pa, + unsigned long tsb_reg, + unsigned long tsb_vaddr, + unsigned long tsb_pte, + unsigned long tsb_descr_pa); + +static inline void tsb_context_switch(struct mm_struct *mm) +{ + __tsb_context_switch(__pa(mm->pgd), mm->context.tsb_reg_val, + mm->context.tsb_map_vaddr, + mm->context.tsb_map_pte, + __pa(&mm->context.tsb_descr)); +} -/* Initialize a new mmu context. This is invoked when a new - * address space instance (unique or shared) is instantiated. - * This just needs to set mm->context to an invalid context. - */ -#define init_new_context(__tsk, __mm) \ - (((__mm)->context.sparc64_ctx_val = 0UL), 0) - -/* Destroy a dead context. This occurs when mmput drops the - * mm_users count to zero, the mmaps have been released, and - * all the page tables have been flushed. Our job is to destroy - * any remaining processor-specific state, and in the sparc64 - * case this just means freeing up the mmu context ID held by - * this task if valid. - */ -#define destroy_context(__mm) \ -do { spin_lock(&ctx_alloc_lock); \ - if (CTX_VALID((__mm)->context)) { \ - unsigned long nr = CTX_NRBITS((__mm)->context); \ - mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \ - } \ - spin_unlock(&ctx_alloc_lock); \ -} while(0) - -/* Reload the two core values used by TLB miss handler - * processing on sparc64. They are: - * 1) The physical address of mm->pgd, when full page - * table walks are necessary, this is where the - * search begins. - * 2) A "PGD cache". For 32-bit tasks only pgd[0] is - * ever used since that maps the entire low 4GB - * completely. To speed up TLB miss processing we - * make this value available to the handlers. This - * decreases the amount of memory traffic incurred. - */ -#define reload_tlbmiss_state(__tsk, __mm) \ -do { \ - register unsigned long paddr asm("o5"); \ - register unsigned long pgd_cache asm("o4"); \ - paddr = __pa((__mm)->pgd); \ - pgd_cache = 0UL; \ - if (task_thread_info(__tsk)->flags & _TIF_32BIT) \ - pgd_cache = get_pgd_cache((__mm)->pgd); \ - __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \ - "mov %3, %%g4\n\t" \ - "mov %0, %%g7\n\t" \ - "stxa %1, [%%g4] %2\n\t" \ - "membar #Sync\n\t" \ - "wrpr %%g0, 0x096, %%pstate" \ - : /* no outputs */ \ - : "r" (paddr), "r" (pgd_cache),\ - "i" (ASI_DMMU), "i" (TSB_REG)); \ -} while(0) +extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss); +#ifdef CONFIG_SMP +extern void smp_tsb_sync(struct mm_struct *mm); +#else +#define smp_tsb_sync(__mm) do { } while (0) +#endif /* Set MMU context in the actual hardware. */ #define load_secondary_context(__mm) \ - __asm__ __volatile__("stxa %0, [%1] %2\n\t" \ - "flush %%g6" \ - : /* No outputs */ \ - : "r" (CTX_HWBITS((__mm)->context)), \ - "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU)) + __asm__ __volatile__( \ + "\n661: stxa %0, [%1] %2\n" \ + " .section .sun4v_1insn_patch, \"ax\"\n" \ + " .word 661b\n" \ + " stxa %0, [%1] %3\n" \ + " .previous\n" \ + " flush %%g6\n" \ + : /* No outputs */ \ + : "r" (CTX_HWBITS((__mm)->context)), \ + "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU)) extern void __flush_tlb_mm(unsigned long, unsigned long); -/* Switch the current MM context. */ +/* Switch the current MM context. Interrupts are disabled. */ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { - unsigned long ctx_valid; + unsigned long ctx_valid, flags; int cpu; - /* Note: page_table_lock is used here to serialize switch_mm - * and activate_mm, and their calls to get_new_mmu_context. - * This use of page_table_lock is unrelated to its other uses. - */ - spin_lock(&mm->page_table_lock); + spin_lock_irqsave(&mm->context.lock, flags); ctx_valid = CTX_VALID(mm->context); if (!ctx_valid) get_new_mmu_context(mm); - spin_unlock(&mm->page_table_lock); - if (!ctx_valid || (old_mm != mm)) { - load_secondary_context(mm); - reload_tlbmiss_state(tsk, mm); - } + /* We have to be extremely careful here or else we will miss + * a TSB grow if we switch back and forth between a kernel + * thread and an address space which has it's TSB size increased + * on another processor. + * + * It is possible to play some games in order to optimize the + * switch, but the safest thing to do is to unconditionally + * perform the secondary context load and the TSB context switch. + * + * For reference the bad case is, for address space "A": + * + * CPU 0 CPU 1 + * run address space A + * set cpu0's bits in cpu_vm_mask + * switch to kernel thread, borrow + * address space A via entry_lazy_tlb + * run address space A + * set cpu1's bit in cpu_vm_mask + * flush_tlb_pending() + * reset cpu_vm_mask to just cpu1 + * TSB grow + * run address space A + * context was valid, so skip + * TSB context switch + * + * At that point cpu0 continues to use a stale TSB, the one from + * before the TSB grow performed on cpu1. cpu1 did not cross-call + * cpu0 to update it's TSB because at that point the cpu_vm_mask + * only had cpu1 set in it. + */ + load_secondary_context(mm); + tsb_context_switch(mm); - /* Even if (mm == old_mm) we _must_ check - * the cpu_vm_mask. If we do not we could - * corrupt the TLB state because of how - * smp_flush_tlb_{page,range,mm} on sparc64 - * and lazy tlb switches work. -DaveM + /* Any time a processor runs a context on an address space + * for the first time, we must flush that context out of the + * local TLB. */ cpu = smp_processor_id(); if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { @@ -116,6 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); } + spin_unlock_irqrestore(&mm->context.lock, flags); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -123,23 +126,20 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str /* Activate a new MM instance for the current task. */ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) { + unsigned long flags; int cpu; - /* Note: page_table_lock is used here to serialize switch_mm - * and activate_mm, and their calls to get_new_mmu_context. - * This use of page_table_lock is unrelated to its other uses. - */ - spin_lock(&mm->page_table_lock); + spin_lock_irqsave(&mm->context.lock, flags); if (!CTX_VALID(mm->context)) get_new_mmu_context(mm); cpu = smp_processor_id(); if (!cpu_isset(cpu, mm->cpu_vm_mask)) cpu_set(cpu, mm->cpu_vm_mask); - spin_unlock(&mm->page_table_lock); load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - reload_tlbmiss_state(current, mm); + tsb_context_switch(mm); + spin_unlock_irqrestore(&mm->context.lock, flags); } #endif /* !(__ASSEMBLY__) */ diff --git a/include/asm-sparc64/numnodes.h b/include/asm-sparc64/numnodes.h new file mode 100644 index 0000000..017e7e7 --- /dev/null +++ b/include/asm-sparc64/numnodes.h @@ -0,0 +1,6 @@ +#ifndef _SPARC64_NUMNODES_H +#define _SPARC64_NUMNODES_H + +#define NODES_SHIFT 0 + +#endif /* !(_SPARC64_NUMNODES_H) */ diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h index 3c59b26..c754676 100644 --- a/include/asm-sparc64/oplib.h +++ b/include/asm-sparc64/oplib.h @@ -12,18 +12,8 @@ #include <linux/config.h> #include <asm/openprom.h> -/* Enumeration to describe the prom major version we have detected. */ -enum prom_major_version { - PROM_V0, /* Original sun4c V0 prom */ - PROM_V2, /* sun4c and early sun4m V2 prom */ - PROM_V3, /* sun4m and later, up to sun4d/sun4e machines V3 */ - PROM_P1275, /* IEEE compliant ISA based Sun PROM, only sun4u */ - PROM_AP1000, /* actually no prom at all */ -}; - -extern enum prom_major_version prom_vers; -/* Revision, and firmware revision. */ -extern unsigned int prom_rev, prom_prev; +/* OBP version string. */ +extern char prom_version[]; /* Root node of the prom device tree, this stays constant after * initialization is complete. @@ -39,6 +29,9 @@ extern int prom_stdin, prom_stdout; extern int prom_chosen_node; /* Helper values and strings in arch/sparc64/kernel/head.S */ +extern const char prom_peer_name[]; +extern const char prom_compatible_name[]; +extern const char prom_root_compatible[]; extern const char prom_finddev_name[]; extern const char prom_chosen_path[]; extern const char prom_getprop_name[]; @@ -130,15 +123,6 @@ extern void prom_setcallback(callback_func_t func_ptr); */ extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size); -/* Get the prom major version. */ -extern int prom_version(void); - -/* Get the prom plugin revision. */ -extern int prom_getrev(void); - -/* Get the prom firmware revision. */ -extern int prom_getprev(void); - /* Character operations to/from the console.... */ /* Non-blocking get character from console. */ @@ -164,6 +148,7 @@ enum prom_input_device { PROMDEV_ITTYA, /* input from ttya */ PROMDEV_ITTYB, /* input from ttyb */ PROMDEV_IRSC, /* input from rsc */ + PROMDEV_IVCONS, /* input from virtual-console */ PROMDEV_I_UNK, }; @@ -176,6 +161,7 @@ enum prom_output_device { PROMDEV_OTTYA, /* to ttya */ PROMDEV_OTTYB, /* to ttyb */ PROMDEV_ORSC, /* to rsc */ + PROMDEV_OVCONS, /* to virtual-console */ PROMDEV_O_UNK, }; @@ -183,10 +169,18 @@ extern enum prom_output_device prom_query_output_device(void); /* Multiprocessor operations... */ #ifdef CONFIG_SMP -/* Start the CPU with the given device tree node, context table, and context - * at the passed program counter. +/* Start the CPU with the given device tree node at the passed program + * counter with the given arg passed in via register %o0. + */ +extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg); + +/* Start the CPU with the given cpu ID at the passed program + * counter with the given arg passed in via register %o0. */ -extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0); +extern void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg); + +/* Stop the CPU with the given cpu ID. */ +extern void prom_stopcpu_cpuid(int cpuid); /* Stop the current CPU. */ extern void prom_stopself(void); @@ -335,6 +329,7 @@ int cpu_find_by_mid(int mid, int *prom_node); /* Client interface level routines. */ extern void prom_set_trap_table(unsigned long tba); +extern void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa); extern long p1275_cmd(const char *, long, ...); diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index 5426bb2..fcb2812 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -104,10 +104,12 @@ typedef unsigned long pgprot_t; #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define ARCH_HAS_SETCLEAR_HUGE_PTE #define ARCH_HAS_HUGETLB_PREFAULT_HOOK +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ - (_AC(0x0000000070000000,UL)) : (PAGE_OFFSET)) + (_AC(0x0000000070000000,UL)) : \ + (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) #endif /* !(__ASSEMBLY__) */ @@ -124,17 +126,10 @@ typedef unsigned long pgprot_t; #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) -/* PFNs are real physical page numbers. However, mem_map only begins to record - * per-page information starting at pfn_base. This is to handle systems where - * the first physical page in the machine is at some huge physical address, - * such as 4GB. This is common on a partitioned E10000, for example. - */ -extern struct page *pfn_to_page(unsigned long pfn); -extern unsigned long page_to_pfn(struct page *); +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)>>PAGE_SHIFT) -#define pfn_valid(pfn) (((pfn)-(pfn_base)) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define virt_to_phys __pa diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index dd35a2c..1396f11 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -139,6 +139,9 @@ struct pci_pbm_info { /* Opaque 32-bit system bus Port ID. */ u32 portid; + /* Opaque 32-bit handle used for hypervisor calls. */ + u32 devhandle; + /* Chipset version information. */ int chip_type; #define PBM_CHIP_TYPE_SABRE 1 diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h index 89bd71b..7c5a589 100644 --- a/include/asm-sparc64/pci.h +++ b/include/asm-sparc64/pci.h @@ -41,10 +41,26 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) struct pci_dev; +struct pci_iommu_ops { + void *(*alloc_consistent)(struct pci_dev *, size_t, dma_addr_t *); + void (*free_consistent)(struct pci_dev *, size_t, void *, dma_addr_t); + dma_addr_t (*map_single)(struct pci_dev *, void *, size_t, int); + void (*unmap_single)(struct pci_dev *, dma_addr_t, size_t, int); + int (*map_sg)(struct pci_dev *, struct scatterlist *, int, int); + void (*unmap_sg)(struct pci_dev *, struct scatterlist *, int, int); + void (*dma_sync_single_for_cpu)(struct pci_dev *, dma_addr_t, size_t, int); + void (*dma_sync_sg_for_cpu)(struct pci_dev *, struct scatterlist *, int, int); +}; + +extern struct pci_iommu_ops *pci_iommu_ops; + /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices. */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle); +static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) +{ + return pci_iommu_ops->alloc_consistent(hwdev, size, dma_handle); +} /* Free and unmap a consistent DMA buffer. * cpu_addr is what was returned from pci_alloc_consistent, @@ -54,7 +70,10 @@ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t * References to the memory and mappings associated with cpu_addr/dma_addr * past this call are illegal. */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +static inline void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) +{ + return pci_iommu_ops->free_consistent(hwdev, size, vaddr, dma_handle); +} /* Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. @@ -62,7 +81,10 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, * Once the device is given the dma address, the device owns this memory * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed. */ -extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction); +static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction) +{ + return pci_iommu_ops->map_single(hwdev, ptr, size, direction); +} /* Unmap a single streaming mode DMA translation. The dma_addr and size * must match what was provided for in a previous pci_map_single call. All @@ -71,7 +93,10 @@ extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction); +static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction) +{ + pci_iommu_ops->unmap_single(hwdev, dma_addr, size, direction); +} /* No highmem on sparc64, plus we have an IOMMU, so mapping pages is easy. */ #define pci_map_page(dev, page, off, size, dir) \ @@ -107,15 +132,19 @@ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t * Device ownership issues as mentioned above for pci_map_single are * the same here. */ -extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction); +static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) +{ + return pci_iommu_ops->map_sg(hwdev, sg, nents, direction); +} /* Unmap a set of streaming mode DMA translations. * Again, cpu read rules concerning calls here are the same as for * pci_unmap_single() above. */ -extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nhwents, int direction); +static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction) +{ + pci_iommu_ops->unmap_sg(hwdev, sg, nhwents, direction); +} /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. @@ -127,8 +156,10 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, * must first perform a pci_dma_sync_for_device, and then the * device again owns the buffer. */ -extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, - size_t size, int direction); +static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) +{ + pci_iommu_ops->dma_sync_single_for_cpu(hwdev, dma_handle, size, direction); +} static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, @@ -144,7 +175,10 @@ pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, * The same as pci_dma_sync_single_* but for a scatter-gather list, * same rules and usage. */ -extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction); +static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) +{ + pci_iommu_ops->dma_sync_sg_for_cpu(hwdev, sg, nelems, direction); +} static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index a96067c..12e4a27 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> #include <asm/spitfire.h> #include <asm/cpudata.h> @@ -13,172 +14,59 @@ #include <asm/page.h> /* Page table allocation/freeing. */ -#ifdef CONFIG_SMP -/* Sliiiicck */ -#define pgt_quicklists local_cpu_data() -#else -extern struct pgtable_cache_struct { - unsigned long *pgd_cache; - unsigned long *pte_cache[2]; - unsigned int pgcache_size; -} pgt_quicklists; -#endif -#define pgd_quicklist (pgt_quicklists.pgd_cache) -#define pmd_quicklist ((unsigned long *)0) -#define pte_quicklist (pgt_quicklists.pte_cache) -#define pgtable_cache_size (pgt_quicklists.pgcache_size) +extern kmem_cache_t *pgtable_cache; -static __inline__ void free_pgd_fast(pgd_t *pgd) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - preempt_disable(); - *(unsigned long *)pgd = (unsigned long) pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; - preempt_enable(); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } -static __inline__ pgd_t *get_pgd_fast(void) +static inline void pgd_free(pgd_t *pgd) { - unsigned long *ret; - - preempt_disable(); - if((ret = pgd_quicklist) != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - preempt_enable(); - } else { - preempt_enable(); - ret = (unsigned long *) __get_free_page(GFP_KERNEL|__GFP_REPEAT); - if(ret) - memset(ret, 0, PAGE_SIZE); - } - return (pgd_t *)ret; + kmem_cache_free(pgtable_cache, pgd); } -static __inline__ void free_pgd_slow(pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - -#ifdef DCACHE_ALIASING_POSSIBLE -#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL) -#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL) -#else -#define VPTE_COLOR(address) 0 -#define DCACHE_COLOR(address) 0 -#endif - #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static __inline__ pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - unsigned long *ret; - int color = 0; - - preempt_disable(); - if (pte_quicklist[color] == NULL) - color = 1; - - if((ret = (unsigned long *)pte_quicklist[color]) != NULL) { - pte_quicklist[color] = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } - preempt_enable(); - - return (pmd_t *)ret; + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } -static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static inline void pmd_free(pmd_t *pmd) { - pmd_t *pmd; - - pmd = pmd_alloc_one_fast(mm, address); - if (!pmd) { - pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - if (pmd) - memset(pmd, 0, PAGE_SIZE); - } - return pmd; + kmem_cache_free(pgtable_cache, pmd); } -static __inline__ void free_pmd_fast(pmd_t *pmd) +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) { - unsigned long color = DCACHE_COLOR((unsigned long)pmd); - - preempt_disable(); - *(unsigned long *)pmd = (unsigned long) pte_quicklist[color]; - pte_quicklist[color] = (unsigned long *) pmd; - pgtable_cache_size++; - preempt_enable(); + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } -static __inline__ void free_pmd_slow(pmd_t *pmd) +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) { - free_page((unsigned long)pmd); + return virt_to_page(pte_alloc_one_kernel(mm, address)); } - -#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) -#define pmd_populate(MM,PMD,PTE_PAGE) \ - pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) - -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address); - -static inline struct page * -pte_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - pte_t *pte = pte_alloc_one_kernel(mm, addr); - - if (pte) - return virt_to_page(pte); - - return NULL; -} - -static __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -{ - unsigned long color = VPTE_COLOR(address); - unsigned long *ret; - - preempt_disable(); - if((ret = (unsigned long *)pte_quicklist[color]) != NULL) { - pte_quicklist[color] = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } - preempt_enable(); - return (pte_t *)ret; -} - -static __inline__ void free_pte_fast(pte_t *pte) -{ - unsigned long color = DCACHE_COLOR((unsigned long)pte); - - preempt_disable(); - *(unsigned long *)pte = (unsigned long) pte_quicklist[color]; - pte_quicklist[color] = (unsigned long *) pte; - pgtable_cache_size++; - preempt_enable(); -} - -static __inline__ void free_pte_slow(pte_t *pte) -{ - free_page((unsigned long)pte); -} - + static inline void pte_free_kernel(pte_t *pte) { - free_pte_fast(pte); + kmem_cache_free(pgtable_cache, pte); } static inline void pte_free(struct page *ptepage) { - free_pte_fast(page_address(ptepage)); + pte_free_kernel(page_address(ptepage)); } -#define pmd_free(pmd) free_pmd_fast(pmd) -#define pgd_free(pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() + +#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) +#define pmd_populate(MM,PMD,PTE_PAGE) \ + pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) + +#define check_pgt_cache() do { } while (0) #endif /* _SPARC64_PGALLOC_H */ diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index f0a9b44..ed4124e 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -25,7 +25,8 @@ #include <asm/const.h> /* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB). - * The page copy blockops can use 0x2000000 to 0x10000000. + * The page copy blockops can use 0x2000000 to 0x4000000. + * The TSB is mapped in the 0x4000000 to 0x6000000 range. * The PROM resides in an area spanning 0xf0000000 to 0x100000000. * The vmalloc area spans 0x100000000 to 0x200000000. * Since modules need to be in the lowest 32-bits of the address space, @@ -34,6 +35,7 @@ * 0x400000000. */ #define TLBTEMP_BASE _AC(0x0000000002000000,UL) +#define TSBMAP_BASE _AC(0x0000000004000000,UL) #define MODULES_VADDR _AC(0x0000000010000000,UL) #define MODULES_LEN _AC(0x00000000e0000000,UL) #define MODULES_END _AC(0x00000000f0000000,UL) @@ -88,162 +90,538 @@ #endif /* !(__ASSEMBLY__) */ -/* Spitfire/Cheetah TTE bits. */ -#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ -#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit up to date*/ -#define _PAGE_SZ4MB _AC(0x6000000000000000,UL) /* 4MB Page */ -#define _PAGE_SZ512K _AC(0x4000000000000000,UL) /* 512K Page */ -#define _PAGE_SZ64K _AC(0x2000000000000000,UL) /* 64K Page */ -#define _PAGE_SZ8K _AC(0x0000000000000000,UL) /* 8K Page */ -#define _PAGE_NFO _AC(0x1000000000000000,UL) /* No Fault Only */ -#define _PAGE_IE _AC(0x0800000000000000,UL) /* Invert Endianness */ -#define _PAGE_SOFT2 _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ -#define _PAGE_RES1 _AC(0x0002000000000000,UL) /* Reserved */ -#define _PAGE_SZ32MB _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ -#define _PAGE_SZ256MB _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ -#define _PAGE_SN _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */ -#define _PAGE_RES2 _AC(0x0000780000000000,UL) /* Reserved */ -#define _PAGE_PADDR_SF _AC(0x000001FFFFFFE000,UL) /* (Spitfire) paddr[40:13]*/ -#define _PAGE_PADDR _AC(0x000007FFFFFFE000,UL) /* (Cheetah) paddr[42:13] */ -#define _PAGE_SOFT _AC(0x0000000000001F80,UL) /* Software bits */ -#define _PAGE_L _AC(0x0000000000000040,UL) /* Locked TTE */ -#define _PAGE_CP _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */ -#define _PAGE_CV _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */ -#define _PAGE_E _AC(0x0000000000000008,UL) /* side-Effect */ -#define _PAGE_P _AC(0x0000000000000004,UL) /* Privileged Page */ -#define _PAGE_W _AC(0x0000000000000002,UL) /* Writable */ -#define _PAGE_G _AC(0x0000000000000001,UL) /* Global */ - -/* Here are the SpitFire software bits we use in the TTE's. - * - * WARNING: If you are going to try and start using some - * of the soft2 bits, you will need to make - * modifications to the swap entry implementation. - * For example, one thing that could happen is that - * swp_entry_to_pte() would BUG_ON() if you tried - * to use one of the soft2 bits for _PAGE_FILE. - * - * Like other architectures, I have aliased _PAGE_FILE with - * _PAGE_MODIFIED. This works because _PAGE_FILE is never - * interpreted that way unless _PAGE_PRESENT is clear. - */ -#define _PAGE_EXEC _AC(0x0000000000001000,UL) /* Executable SW bit */ -#define _PAGE_MODIFIED _AC(0x0000000000000800,UL) /* Modified (dirty) */ -#define _PAGE_FILE _AC(0x0000000000000800,UL) /* Pagecache page */ -#define _PAGE_ACCESSED _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ -#define _PAGE_READ _AC(0x0000000000000200,UL) /* Readable SW Bit */ -#define _PAGE_WRITE _AC(0x0000000000000100,UL) /* Writable SW Bit */ -#define _PAGE_PRESENT _AC(0x0000000000000080,UL) /* Present */ +/* PTE bits which are the same in SUN4U and SUN4V format. */ +#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ +#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ + +/* SUN4U pte bits... */ +#define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ +#define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */ +#define _PAGE_SZ64K_4U _AC(0x2000000000000000,UL) /* 64K Page */ +#define _PAGE_SZ8K_4U _AC(0x0000000000000000,UL) /* 8K Page */ +#define _PAGE_NFO_4U _AC(0x1000000000000000,UL) /* No Fault Only */ +#define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */ +#define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ +#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ +#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ +#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ +#define _PAGE_SN_4U _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */ +#define _PAGE_RES2_4U _AC(0x0000780000000000,UL) /* Reserved */ +#define _PAGE_PADDR_4U _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13] */ +#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */ +#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */ +#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */ +#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */ +#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ +#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */ +#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */ +#define _PAGE_PRESENT_4U _AC(0x0000000000000080,UL) /* Present */ +#define _PAGE_L_4U _AC(0x0000000000000040,UL) /* Locked TTE */ +#define _PAGE_CP_4U _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */ +#define _PAGE_CV_4U _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */ +#define _PAGE_E_4U _AC(0x0000000000000008,UL) /* side-Effect */ +#define _PAGE_P_4U _AC(0x0000000000000004,UL) /* Privileged Page */ +#define _PAGE_W_4U _AC(0x0000000000000002,UL) /* Writable */ + +/* SUN4V pte bits... */ +#define _PAGE_NFO_4V _AC(0x4000000000000000,UL) /* No Fault Only */ +#define _PAGE_SOFT2_4V _AC(0x3F00000000000000,UL) /* Software bits, set 2 */ +#define _PAGE_MODIFIED_4V _AC(0x2000000000000000,UL) /* Modified (dirty) */ +#define _PAGE_ACCESSED_4V _AC(0x1000000000000000,UL) /* Accessed (ref'd) */ +#define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */ +#define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */ +#define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */ +#define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */ +#define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */ +#define _PAGE_CP_4V _AC(0x0000000000000400,UL) /* Cacheable in P-Cache */ +#define _PAGE_CV_4V _AC(0x0000000000000200,UL) /* Cacheable in V-Cache */ +#define _PAGE_P_4V _AC(0x0000000000000100,UL) /* Privileged Page */ +#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */ +#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */ +#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */ +#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */ +#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */ +#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */ +#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */ +#define _PAGE_SZ2GB_4V _AC(0x0000000000000006,UL) /* 2GB Page */ +#define _PAGE_SZ256MB_4V _AC(0x0000000000000005,UL) /* 256MB Page */ +#define _PAGE_SZ32MB_4V _AC(0x0000000000000004,UL) /* 32MB Page */ +#define _PAGE_SZ4MB_4V _AC(0x0000000000000003,UL) /* 4MB Page */ +#define _PAGE_SZ512K_4V _AC(0x0000000000000002,UL) /* 512K Page */ +#define _PAGE_SZ64K_4V _AC(0x0000000000000001,UL) /* 64K Page */ +#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */ #if PAGE_SHIFT == 13 -#define _PAGE_SZBITS _PAGE_SZ8K +#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V #elif PAGE_SHIFT == 16 -#define _PAGE_SZBITS _PAGE_SZ64K +#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V #elif PAGE_SHIFT == 19 -#define _PAGE_SZBITS _PAGE_SZ512K +#define _PAGE_SZBITS_4U _PAGE_SZ512K_4U +#define _PAGE_SZBITS_4V _PAGE_SZ512K_4V #elif PAGE_SHIFT == 22 -#define _PAGE_SZBITS _PAGE_SZ4MB +#define _PAGE_SZBITS_4U _PAGE_SZ4MB_4U +#define _PAGE_SZBITS_4V _PAGE_SZ4MB_4V #else #error Wrong PAGE_SHIFT specified #endif #if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) -#define _PAGE_SZHUGE _PAGE_SZ4MB +#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -#define _PAGE_SZHUGE _PAGE_SZ512K +#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -#define _PAGE_SZHUGE _PAGE_SZ64K +#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U +#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V #endif -#define _PAGE_CACHE (_PAGE_CP | _PAGE_CV) +/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */ +#define __P000 __pgprot(0) +#define __P001 __pgprot(0) +#define __P010 __pgprot(0) +#define __P011 __pgprot(0) +#define __P100 __pgprot(0) +#define __P101 __pgprot(0) +#define __P110 __pgprot(0) +#define __P111 __pgprot(0) + +#define __S000 __pgprot(0) +#define __S001 __pgprot(0) +#define __S010 __pgprot(0) +#define __S011 __pgprot(0) +#define __S100 __pgprot(0) +#define __S101 __pgprot(0) +#define __S110 __pgprot(0) +#define __S111 __pgprot(0) -#define __DIRTY_BITS (_PAGE_MODIFIED | _PAGE_WRITE | _PAGE_W) -#define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_READ | _PAGE_R) -#define __PRIV_BITS _PAGE_P +#ifndef __ASSEMBLY__ -#define PAGE_NONE __pgprot (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_CACHE) +extern pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long); -/* Don't set the TTE _PAGE_W bit here, else the dirty bit never gets set. */ -#define PAGE_SHARED __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_WRITE | _PAGE_EXEC) +extern unsigned long pte_sz_bits(unsigned long size); -#define PAGE_COPY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_EXEC) +extern pgprot_t PAGE_KERNEL; +extern pgprot_t PAGE_KERNEL_LOCKED; +extern pgprot_t PAGE_COPY; +extern pgprot_t PAGE_SHARED; -#define PAGE_READONLY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_EXEC) +/* XXX This uglyness is for the atyfb driver's sparc mmap() support. XXX */ +extern unsigned long _PAGE_IE; +extern unsigned long _PAGE_E; +extern unsigned long _PAGE_CACHE; -#define PAGE_KERNEL __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \ - __PRIV_BITS | \ - __ACCESS_BITS | __DIRTY_BITS | _PAGE_EXEC) +extern unsigned long pg_iobits; +extern unsigned long _PAGE_ALL_SZ_BITS; +extern unsigned long _PAGE_SZBITS; -#define PAGE_SHARED_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | \ - __ACCESS_BITS | _PAGE_WRITE) +extern struct page *mem_map_zero; +#define ZERO_PAGE(vaddr) (mem_map_zero) -#define PAGE_COPY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | __ACCESS_BITS) +/* PFNs are real physical page numbers. However, mem_map only begins to record + * per-page information starting at pfn_base. This is to handle systems where + * the first physical page in the machine is at some huge physical address, + * such as 4GB. This is common on a partitioned E10000, for example. + */ +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) +{ + unsigned long paddr = pfn << PAGE_SHIFT; + unsigned long sz_bits; + + sz_bits = 0UL; + if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) { + __asm__ __volatile__( + "\n661: sethi %uhi(%1), %0\n" + " sllx %0, 32, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " mov %2, %0\n" + " nop\n" + " .previous\n" + : "=r" (sz_bits) + : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V)); + } + return __pte(paddr | sz_bits | pgprot_val(prot)); +} +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -#define PAGE_READONLY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \ - _PAGE_CACHE | __ACCESS_BITS) +/* This one can be done with two shifts. */ +static inline unsigned long pte_pfn(pte_t pte) +{ + unsigned long ret; + + __asm__ __volatile__( + "\n661: sllx %1, %2, %0\n" + " srlx %0, %3, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sllx %1, %4, %0\n" + " srlx %0, %5, %0\n" + " .previous\n" + : "=r" (ret) + : "r" (pte_val(pte)), + "i" (21), "i" (21 + PAGE_SHIFT), + "i" (8), "i" (8 + PAGE_SHIFT)); + + return ret; +} +#define pte_page(x) pfn_to_page(pte_pfn(x)) -#define _PFN_MASK _PAGE_PADDR +static inline pte_t pte_modify(pte_t pte, pgprot_t prot) +{ + unsigned long mask, tmp; + + /* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347) + * SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8) + * + * Even if we use negation tricks the result is still a 6 + * instruction sequence, so don't try to play fancy and just + * do the most straightforward implementation. + * + * Note: We encode this into 3 sun4v 2-insn patch sequences. + */ -#define pg_iobits (_PAGE_VALID | _PAGE_PRESENT | __DIRTY_BITS | \ - __ACCESS_BITS | _PAGE_E) + __asm__ __volatile__( + "\n661: sethi %%uhi(%2), %1\n" + " sethi %%hi(%2), %0\n" + "\n662: or %1, %%ulo(%2), %1\n" + " or %0, %%lo(%2), %0\n" + "\n663: sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%3), %1\n" + " sethi %%hi(%3), %0\n" + " .word 662b\n" + " or %1, %%ulo(%3), %1\n" + " or %0, %%lo(%3), %0\n" + " .word 663b\n" + " sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .previous\n" + : "=r" (mask), "=r" (tmp) + : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | + _PAGE_SZBITS_4U), + "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | + _PAGE_SZBITS_4V)); + + return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); +} -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY_NOEXEC -#define __P010 PAGE_COPY_NOEXEC -#define __P011 PAGE_COPY_NOEXEC -#define __P100 PAGE_READONLY -#define __P101 PAGE_READONLY -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY +static inline pte_t pgoff_to_pte(unsigned long off) +{ + off <<= PAGE_SHIFT; + + __asm__ __volatile__( + "\n661: or %0, %2, %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " or %0, %3, %0\n" + " .previous\n" + : "=r" (off) + : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V)); + + return __pte(off); +} -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY_NOEXEC -#define __S010 PAGE_SHARED_NOEXEC -#define __S011 PAGE_SHARED_NOEXEC -#define __S100 PAGE_READONLY -#define __S101 PAGE_READONLY -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED +static inline pgprot_t pgprot_noncached(pgprot_t prot) +{ + unsigned long val = pgprot_val(prot); + + __asm__ __volatile__( + "\n661: andn %0, %2, %0\n" + " or %0, %3, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " andn %0, %4, %0\n" + " or %0, %3, %0\n" + " .previous\n" + : "=r" (val) + : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), + "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); + + return __pgprot(val); +} +/* Various pieces of code check for platform support by ifdef testing + * on "pgprot_noncached". That's broken and should be fixed, but for + * now... + */ +#define pgprot_noncached pgprot_noncached -#ifndef __ASSEMBLY__ +#ifdef CONFIG_HUGETLB_PAGE +static inline pte_t pte_mkhuge(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: sethi %%uhi(%1), %0\n" + " sllx %0, 32, %0\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " mov %2, %0\n" + " nop\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V)); + + return __pte(pte_val(pte) | mask); +} +#endif -extern unsigned long phys_base; -extern unsigned long pfn_base; +static inline pte_t pte_mkdirty(pte_t pte) +{ + unsigned long val = pte_val(pte), tmp; + + __asm__ __volatile__( + "\n661: or %0, %3, %0\n" + " nop\n" + "\n662: nop\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sllx %1, 32, %1\n" + " .word 662b\n" + " or %1, %%lo(%4), %1\n" + " or %0, %1, %0\n" + " .previous\n" + : "=r" (val), "=r" (tmp) + : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U), + "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V)); + + return __pte(val); +} -extern struct page *mem_map_zero; -#define ZERO_PAGE(vaddr) (mem_map_zero) +static inline pte_t pte_mkclean(pte_t pte) +{ + unsigned long val = pte_val(pte), tmp; + + __asm__ __volatile__( + "\n661: andn %0, %3, %0\n" + " nop\n" + "\n662: nop\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sllx %1, 32, %1\n" + " .word 662b\n" + " or %1, %%lo(%4), %1\n" + " andn %0, %1, %0\n" + " .previous\n" + : "=r" (val), "=r" (tmp) + : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U), + "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V)); + + return __pte(val); +} -/* PFNs are real physical page numbers. However, mem_map only begins to record - * per-page information starting at pfn_base. This is to handle systems where - * the first physical page in the machine is at some huge physical address, - * such as 4GB. This is common on a partitioned E10000, for example. - */ +static inline pte_t pte_mkwrite(pte_t pte) +{ + unsigned long val = pte_val(pte), mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V)); + + return __pte(val | mask); +} -#define pfn_pte(pfn, prot) \ - __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot) | _PAGE_SZBITS) -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +static inline pte_t pte_wrprotect(pte_t pte) +{ + unsigned long val = pte_val(pte), tmp; + + __asm__ __volatile__( + "\n661: andn %0, %3, %0\n" + " nop\n" + "\n662: nop\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sllx %1, 32, %1\n" + " .word 662b\n" + " or %1, %%lo(%4), %1\n" + " andn %0, %1, %0\n" + " .previous\n" + : "=r" (val), "=r" (tmp) + : "0" (val), "i" (_PAGE_WRITE_4U | _PAGE_W_4U), + "i" (_PAGE_WRITE_4V | _PAGE_W_4V)); + + return __pte(val); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V)); + + mask |= _PAGE_R; + + return __pte(pte_val(pte) & ~mask); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V)); + + mask |= _PAGE_R; + + return __pte(pte_val(pte) | mask); +} -#define pte_pfn(x) ((pte_val(x) & _PAGE_PADDR)>>PAGE_SHIFT) -#define pte_page(x) pfn_to_page(pte_pfn(x)) +static inline unsigned long pte_young(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V)); + + return (pte_val(pte) & mask); +} + +static inline unsigned long pte_dirty(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_MODIFIED_4U), "i" (_PAGE_MODIFIED_4V)); + + return (pte_val(pte) & mask); +} -static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) +static inline unsigned long pte_write(pte_t pte) { - pte_t __pte; - const unsigned long preserve_mask = (_PFN_MASK | - _PAGE_MODIFIED | _PAGE_ACCESSED | - _PAGE_CACHE | _PAGE_E | - _PAGE_PRESENT | _PAGE_SZBITS); + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V)); + + return (pte_val(pte) & mask); +} - pte_val(__pte) = (pte_val(orig_pte) & preserve_mask) | - (pgprot_val(new_prot) & ~preserve_mask); +static inline unsigned long pte_exec(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: sethi %%hi(%1), %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " mov %2, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_EXEC_4U), "i" (_PAGE_EXEC_4V)); + + return (pte_val(pte) & mask); +} - return __pte; +static inline unsigned long pte_read(pte_t pte) +{ + unsigned long mask; + + __asm__ __volatile__( + "\n661: mov %1, %0\n" + " nop\n" + " .section .sun4v_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%2), %0\n" + " sllx %0, 32, %0\n" + " .previous\n" + : "=r" (mask) + : "i" (_PAGE_READ_4U), "i" (_PAGE_READ_4V)); + + return (pte_val(pte) & mask); } + +static inline unsigned long pte_file(pte_t pte) +{ + unsigned long val = pte_val(pte); + + __asm__ __volatile__( + "\n661: and %0, %2, %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " and %0, %3, %0\n" + " .previous\n" + : "=r" (val) + : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V)); + + return val; +} + +static inline unsigned long pte_present(pte_t pte) +{ + unsigned long val = pte_val(pte); + + __asm__ __volatile__( + "\n661: and %0, %2, %0\n" + " .section .sun4v_1insn_patch, \"ax\"\n" + " .word 661b\n" + " and %0, %3, %0\n" + " .previous\n" + : "=r" (val) + : "0" (val), "i" (_PAGE_PRESENT_4U), "i" (_PAGE_PRESENT_4V)); + + return val; +} + #define pmd_set(pmdp, ptep) \ (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) #define pud_set(pudp, pmdp) \ @@ -253,8 +631,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page(pud) \ ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL))) -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (0) #define pmd_present(pmd) (pmd_val(pmd) != 0U) @@ -264,30 +640,8 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0U) -/* The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -#define pte_read(pte) (pte_val(pte) & _PAGE_READ) -#define pte_exec(pte) (pte_val(pte) & _PAGE_EXEC) -#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) -#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~(_PAGE_WRITE|_PAGE_W))) -#define pte_rdprotect(pte) \ - (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_READ)) -#define pte_mkclean(pte) \ - (__pte(pte_val(pte) & ~(_PAGE_MODIFIED|_PAGE_W))) -#define pte_mkold(pte) \ - (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_ACCESSED)) - -/* Permanent address of a page. */ -#define __page_address(page) page_address(page) - -/* Be very careful when you change these three, they are delicate. */ -#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R)) -#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE)) -#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W)) -#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE)) +/* Same in both SUN4V and SUN4U. */ +#define pte_none(pte) (!pte_val(pte)) /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -296,11 +650,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -/* extract the pgd cache used for optimizing the tlb miss - * slow path when executing 32-bit compat processes - */ -#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11) - /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page(*(pudp)) + \ @@ -327,6 +676,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p /* It is more efficient to let flush_tlb_kernel_range() * handle init_mm tlb flushes. + * + * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U + * and SUN4V pte layout, so this inline test is fine. */ if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID)) tlb_batch_add(mm, addr, ptep, orig); @@ -361,42 +713,23 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) /* File offset in PTE support. */ -#define pte_file(pte) (pte_val(pte) & _PAGE_FILE) +extern unsigned long pte_file(pte_t); #define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pgoff_to_pte(off) (__pte(((off) << PAGE_SHIFT) | _PAGE_FILE)) +extern pte_t pgoff_to_pte(unsigned long); #define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL) extern unsigned long prom_virt_to_phys(unsigned long, int *); -static __inline__ unsigned long -sun4u_get_pte (unsigned long addr) -{ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - if (addr >= PAGE_OFFSET) - return addr & _PAGE_PADDR; - if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS)) - return prom_virt_to_phys(addr, NULL); - pgdp = pgd_offset_k(addr); - pudp = pud_offset(pgdp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - return pte_val(*ptep) & _PAGE_PADDR; -} +extern unsigned long sun4u_get_pte(unsigned long); -static __inline__ unsigned long -__get_phys (unsigned long addr) +static inline unsigned long __get_phys(unsigned long addr) { - return sun4u_get_pte (addr); + return sun4u_get_pte(addr); } -static __inline__ int -__get_iospace (unsigned long addr) +static inline int __get_iospace(unsigned long addr) { - return ((sun4u_get_pte (addr) & 0xf0000000) >> 28); + return ((sun4u_get_pte(addr) & 0xf0000000) >> 28); } extern unsigned long *sparc64_valid_addr_bitmap; @@ -409,11 +742,6 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); -/* Clear virtual and physical cachability, set side-effect bit. */ -#define pgprot_noncached(prot) \ - (__pgprot((pgprot_val(prot) & ~(_PAGE_CP | _PAGE_CV)) | \ - _PAGE_E)) - /* * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in * its high 4 bits. These macros/functions put it there or get it from there. @@ -424,8 +752,11 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, #include <asm-generic/pgtable.h> -/* We provide our own get_unmapped_area to cope with VA holes for userland */ +/* We provide our own get_unmapped_area to cope with VA holes and + * SHM area cache aliasing for userland. + */ #define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN /* We provide a special get_unmapped_area for framebuffer mmaps to try and use * the largest alignment possible such that larget PTEs can be used. @@ -435,12 +766,9 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, unsigned long); #define HAVE_ARCH_FB_UNMAPPED_AREA -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -extern void check_pgt_cache(void); +extern void pgtable_cache_init(void); +extern void sun4v_register_fault_status(void); +extern void sun4v_ktsb_register(void); #endif /* !(__ASSEMBLY__) */ diff --git a/include/asm-sparc64/pil.h b/include/asm-sparc64/pil.h index 8f87750..79f827e 100644 --- a/include/asm-sparc64/pil.h +++ b/include/asm-sparc64/pil.h @@ -16,11 +16,13 @@ #define PIL_SMP_CALL_FUNC 1 #define PIL_SMP_RECEIVE_SIGNAL 2 #define PIL_SMP_CAPTURE 3 +#define PIL_SMP_CTX_NEW_VERSION 4 #ifndef __ASSEMBLY__ #define PIL_RESERVED(PIL) ((PIL) == PIL_SMP_CALL_FUNC || \ (PIL) == PIL_SMP_RECEIVE_SIGNAL || \ - (PIL) == PIL_SMP_CAPTURE) + (PIL) == PIL_SMP_CAPTURE || \ + (PIL) == PIL_SMP_CTX_NEW_VERSION) #endif #endif /* !(_SPARC64_PIL_H) */ diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h index cd8d9b4..c6896b8 100644 --- a/include/asm-sparc64/processor.h +++ b/include/asm-sparc64/processor.h @@ -28,6 +28,8 @@ * User lives in his very own context, and cannot reference us. Note * that TASK_SIZE is a misnomer, it really gives maximum user virtual * address that the kernel will allocate out. + * + * XXX No longer using virtual page tables, kill this upper limit... */ #define VA_BITS 44 #ifndef __ASSEMBLY__ @@ -37,18 +39,6 @@ #endif #define TASK_SIZE ((unsigned long)-VPTE_SIZE) -/* - * The vpte base must be able to hold the entire vpte, half - * of which lives above, and half below, the base. And it - * is placed as close to the highest address range as possible. - */ -#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2)) -#if 1 -#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE -#else -#define VPTE_BASE_CHEETAH 0xffe0000000000000 -#endif - #ifndef __ASSEMBLY__ typedef struct { @@ -101,7 +91,8 @@ extern unsigned long thread_saved_pc(struct task_struct *); /* Do necessary setup to start up a newly executed thread. */ #define start_thread(regs, pc, sp) \ do { \ - regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (ASI_PNF << 24); \ + unsigned long __asi = ASI_PNF; \ + regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (__asi << 24UL); \ regs->tpc = ((pc & (~3)) - 4); \ regs->tnpc = regs->tpc + 4; \ regs->y = 0; \ @@ -138,10 +129,10 @@ do { \ #define start_thread32(regs, pc, sp) \ do { \ + unsigned long __asi = ASI_PNF; \ pc &= 0x00000000ffffffffUL; \ sp &= 0x00000000ffffffffUL; \ -\ - regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM); \ + regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM) | (__asi << 24UL); \ regs->tpc = ((pc & (~3)) - 4); \ regs->tnpc = regs->tpc + 4; \ regs->y = 0; \ @@ -226,6 +217,8 @@ static inline void prefetchw(const void *x) #define spin_lock_prefetch(x) prefetchw(x) +#define HAVE_ARCH_PICK_MMAP_LAYOUT + #endif /* !(__ASSEMBLY__) */ #endif /* !(__ASM_SPARC64_PROCESSOR_H) */ diff --git a/include/asm-sparc64/pstate.h b/include/asm-sparc64/pstate.h index 29fb74a..49a7924 100644 --- a/include/asm-sparc64/pstate.h +++ b/include/asm-sparc64/pstate.h @@ -28,11 +28,12 @@ /* The V9 TSTATE Register (with SpitFire and Linux extensions). * - * --------------------------------------------------------------- - * | Resv | CCR | ASI | %pil | PSTATE | Resv | CWP | - * --------------------------------------------------------------- - * 63 40 39 32 31 24 23 20 19 8 7 5 4 0 + * --------------------------------------------------------------------- + * | Resv | GL | CCR | ASI | %pil | PSTATE | Resv | CWP | + * --------------------------------------------------------------------- + * 63 43 42 40 39 32 31 24 23 20 19 8 7 5 4 0 */ +#define TSTATE_GL _AC(0x0000070000000000,UL) /* Global reg level */ #define TSTATE_CCR _AC(0x000000ff00000000,UL) /* Condition Codes. */ #define TSTATE_XCC _AC(0x000000f000000000,UL) /* Condition Codes. */ #define TSTATE_XNEG _AC(0x0000008000000000,UL) /* %xcc Negative. */ diff --git a/include/asm-sparc64/scratchpad.h b/include/asm-sparc64/scratchpad.h new file mode 100644 index 0000000..5e8b01f --- /dev/null +++ b/include/asm-sparc64/scratchpad.h @@ -0,0 +1,14 @@ +#ifndef _SPARC64_SCRATCHPAD_H +#define _SPARC64_SCRATCHPAD_H + +/* Sun4v scratchpad registers, accessed via ASI_SCRATCHPAD. */ + +#define SCRATCHPAD_MMU_MISS 0x00 /* Shared with OBP - set by OBP */ +#define SCRATCHPAD_CPUID 0x08 /* Shared with OBP - set by hypervisor */ +#define SCRATCHPAD_UTSBREG1 0x10 +#define SCRATCHPAD_UTSBREG2 0x18 + /* 0x20 and 0x28, hypervisor only... */ +#define SCRATCHPAD_UNUSED1 0x30 +#define SCRATCHPAD_UNUSED2 0x38 /* Reserved for OBP */ + +#endif /* !(_SPARC64_SCRATCHPAD_H) */ diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index 473edb2..89d86ec 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -33,37 +33,13 @@ extern cpumask_t phys_cpu_present_map; #define cpu_possible_map phys_cpu_present_map +extern cpumask_t cpu_sibling_map[NR_CPUS]; + /* * General functions that each host system must provide. */ -static __inline__ int hard_smp_processor_id(void) -{ - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - unsigned long cfg, ver; - __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (cfg) - : "i" (ASI_JBUS_CONFIG)); - return ((cfg >> 17) & 0x1f); - } else { - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (cfg) - : "i" (ASI_SAFARI_CONFIG)); - return ((cfg >> 17) & 0x3ff); - } - } else if (this_is_starfire != 0) { - return starfire_hard_smp_processor_id(); - } else { - unsigned long upaconfig; - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (upaconfig) - : "i" (ASI_UPA_CONFIG)); - return ((upaconfig >> 17) & 0x1f); - } -} - +extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) extern void smp_setup_cpu_possible_map(void); diff --git a/include/asm-sparc64/sparsemem.h b/include/asm-sparc64/sparsemem.h new file mode 100644 index 0000000..ed5c9d8 --- /dev/null +++ b/include/asm-sparc64/sparsemem.h @@ -0,0 +1,12 @@ +#ifndef _SPARC64_SPARSEMEM_H +#define _SPARC64_SPARSEMEM_H + +#ifdef __KERNEL__ + +#define SECTION_SIZE_BITS 26 +#define MAX_PHYSADDR_BITS 42 +#define MAX_PHYSMEM_BITS 42 + +#endif /* !(__KERNEL__) */ + +#endif /* !(_SPARC64_SPARSEMEM_H) */ diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 962638c..23ad8a7 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h @@ -44,6 +44,7 @@ enum ultra_tlb_layout { spitfire = 0, cheetah = 1, cheetah_plus = 2, + hypervisor = 3, }; extern enum ultra_tlb_layout tlb_type; diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index af254e5..a18ec87 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -209,9 +209,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\ + trap_block[current_thread_info()->cpu].thread = \ + task_thread_info(next); \ __asm__ __volatile__( \ "mov %%g4, %%g7\n\t" \ - "wrpr %%g0, 0x95, %%pstate\n\t" \ "stx %%i6, [%%sp + 2047 + 0x70]\n\t" \ "stx %%i7, [%%sp + 2047 + 0x78]\n\t" \ "rdpr %%wstate, %%o5\n\t" \ @@ -225,14 +226,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ "ldx [%%g6 + %3], %%o6\n\t" \ "ldub [%%g6 + %2], %%o5\n\t" \ "ldub [%%g6 + %4], %%o7\n\t" \ - "mov %%g6, %%l2\n\t" \ "wrpr %%o5, 0x0, %%wstate\n\t" \ "ldx [%%sp + 2047 + 0x70], %%i6\n\t" \ "ldx [%%sp + 2047 + 0x78], %%i7\n\t" \ - "wrpr %%g0, 0x94, %%pstate\n\t" \ - "mov %%l2, %%g6\n\t" \ "ldx [%%g6 + %6], %%g4\n\t" \ - "wrpr %%g0, 0x96, %%pstate\n\t" \ "brz,pt %%o7, 1f\n\t" \ " mov %%g7, %0\n\t" \ "b,a ret_from_syscall\n\t" \ diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index ac9d068..2ebf7f2 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h @@ -64,8 +64,6 @@ struct thread_info { __u64 kernel_cntd0, kernel_cntd1; __u64 pcr_reg; - __u64 cee_stuff; - struct restart_block restart_block; struct pt_regs *kern_una_regs; @@ -104,10 +102,9 @@ struct thread_info { #define TI_KERN_CNTD0 0x00000480 #define TI_KERN_CNTD1 0x00000488 #define TI_PCR 0x00000490 -#define TI_CEE_STUFF 0x00000498 -#define TI_RESTART_BLOCK 0x000004a0 -#define TI_KUNA_REGS 0x000004c8 -#define TI_KUNA_INSN 0x000004d0 +#define TI_RESTART_BLOCK 0x00000498 +#define TI_KUNA_REGS 0x000004c0 +#define TI_KUNA_INSN 0x000004c8 #define TI_FPREGS 0x00000500 /* We embed this in the uppermost byte of thread_info->flags */ diff --git a/include/asm-sparc64/timex.h b/include/asm-sparc64/timex.h index 9e8d417..2a5e4eb 100644 --- a/include/asm-sparc64/timex.h +++ b/include/asm-sparc64/timex.h @@ -14,4 +14,10 @@ typedef unsigned long cycles_t; #define get_cycles() tick_ops->get_tick() +#define ARCH_HAS_READ_CURRENT_TIMER 1 +#define read_current_timer(timer_val_p) \ +({ *timer_val_p = tick_ops->get_tick(); \ + 0; \ +}) + #endif diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h index 3ef9909..9ad5d9c 100644 --- a/include/asm-sparc64/tlbflush.h +++ b/include/asm-sparc64/tlbflush.h @@ -5,6 +5,11 @@ #include <linux/mm.h> #include <asm/mmu_context.h> +/* TSB flush operations. */ +struct mmu_gather; +extern void flush_tsb_kernel_range(unsigned long start, unsigned long end); +extern void flush_tsb_user(struct mmu_gather *mp); + /* TLB flush operations. */ extern void flush_tlb_pending(void); @@ -14,28 +19,36 @@ extern void flush_tlb_pending(void); #define flush_tlb_page(vma,addr) flush_tlb_pending() #define flush_tlb_mm(mm) flush_tlb_pending() +/* Local cpu only. */ extern void __flush_tlb_all(void); + extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r); extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP -#define flush_tlb_all() __flush_tlb_all() #define flush_tlb_kernel_range(start,end) \ - __flush_tlb_kernel_range(start,end) +do { flush_tsb_kernel_range(start,end); \ + __flush_tlb_kernel_range(start,end); \ +} while (0) #else /* CONFIG_SMP */ -extern void smp_flush_tlb_all(void); extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); -#define flush_tlb_all() smp_flush_tlb_all() #define flush_tlb_kernel_range(start, end) \ - smp_flush_tlb_kernel_range(start, end) +do { flush_tsb_kernel_range(start,end); \ + smp_flush_tlb_kernel_range(start, end); \ +} while (0) #endif /* ! CONFIG_SMP */ -extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long); +static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + /* We don't use virtual page tables for TLB miss processing + * any more. Nowadays we use the TSB. + */ +} #endif /* _SPARC64_TLBFLUSH_H */ diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h new file mode 100644 index 0000000..e82612c --- /dev/null +++ b/include/asm-sparc64/tsb.h @@ -0,0 +1,281 @@ +#ifndef _SPARC64_TSB_H +#define _SPARC64_TSB_H + +/* The sparc64 TSB is similar to the powerpc hashtables. It's a + * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes + * pointers into this table for 8K and 64K page sizes, and also a + * comparison TAG based upon the virtual address and context which + * faults. + * + * TLB miss trap handler software does the actual lookup via something + * of the form: + * + * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1 + * ldxa [%g0] ASI_{D,I}MMU, %g6 + * sllx %g6, 22, %g6 + * srlx %g6, 22, %g6 + * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 + * cmp %g4, %g6 + * bne,pn %xcc, tsb_miss_{d,i}tlb + * mov FAULT_CODE_{D,I}TLB, %g3 + * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN + * retry + * + * + * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte + * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu + * register which is: + * + * ------------------------------------------------- + * | - | CONTEXT | - | VADDR bits 63:22 | + * ------------------------------------------------- + * 63 61 60 48 47 42 41 0 + * + * But actually, since we use per-mm TSB's, we zero out the CONTEXT + * field. + * + * Like the powerpc hashtables we need to use locking in order to + * synchronize while we update the entries. PTE updates need locking + * as well. + * + * We need to carefully choose a lock bits for the TSB entry. We + * choose to use bit 47 in the tag. Also, since we never map anything + * at page zero in context zero, we use zero as an invalid tag entry. + * When the lock bit is set, this forces a tag comparison failure. + */ + +#define TSB_TAG_LOCK_BIT 47 +#define TSB_TAG_LOCK_HIGH (1 << (TSB_TAG_LOCK_BIT - 32)) + +#define TSB_TAG_INVALID_BIT 46 +#define TSB_TAG_INVALID_HIGH (1 << (TSB_TAG_INVALID_BIT - 32)) + +#define TSB_MEMBAR membar #StoreStore + +/* Some cpus support physical address quad loads. We want to use + * those if possible so we don't need to hard-lock the TSB mapping + * into the TLB. We encode some instruction patching in order to + * support this. + * + * The kernel TSB is locked into the TLB by virtue of being in the + * kernel image, so we don't play these games for swapper_tsb access. + */ +#ifndef __ASSEMBLY__ +struct tsb_ldquad_phys_patch_entry { + unsigned int addr; + unsigned int sun4u_insn; + unsigned int sun4v_insn; +}; +extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch, + __tsb_ldquad_phys_patch_end; + +struct tsb_phys_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; +#endif +#define TSB_LOAD_QUAD(TSB, REG) \ +661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \ + .section .tsb_ldquad_phys_patch, "ax"; \ + .word 661b; \ + ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \ + ldda [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \ + .previous + +#define TSB_LOAD_TAG_HIGH(TSB, REG) \ +661: lduwa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + lduwa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_LOAD_TAG(TSB, REG) \ +661: ldxa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + ldxa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \ +661: casa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_CAS_TAG(TSB, REG1, REG2) \ +661: casxa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_STORE(ADDR, VAL) \ +661: stxa VAL, [ADDR] ASI_N; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + stxa VAL, [ADDR] ASI_PHYS_USE_EC; \ + .previous + +#define TSB_LOCK_TAG(TSB, REG1, REG2) \ +99: TSB_LOAD_TAG_HIGH(TSB, REG1); \ + sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ + andcc REG1, REG2, %g0; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \ + cmp REG1, REG2; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_MEMBAR + +#define TSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + TSB_STORE(TSB, TTE); \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + TSB_STORE(TSB, TAG); + +#define KTSB_LOAD_QUAD(TSB, REG) \ + ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; + +#define KTSB_STORE(ADDR, VAL) \ + stxa VAL, [ADDR] ASI_N; + +#define KTSB_LOCK_TAG(TSB, REG1, REG2) \ +99: lduwa [TSB] ASI_N, REG1; \ + sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ + andcc REG1, REG2, %g0; \ + bne,pn %icc, 99b; \ + nop; \ + casa [TSB] ASI_N, REG1, REG2;\ + cmp REG1, REG2; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_MEMBAR + +#define KTSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + stxa TTE, [TSB] ASI_N; \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + stxa TAG, [TSB] ASI_N; + + /* Do a kernel page table walk. Leaves physical PTE pointer in + * REG1. Jumps to FAIL_LABEL on early page table walk termination. + * VADDR will not be clobbered, but REG2 will. + */ +#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ + sethi %hi(swapper_pg_dir), REG1; \ + or REG1, %lo(swapper_pg_dir), REG1; \ + sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x3, REG2; \ + lduw [REG1 + REG2], REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x3, REG2; \ + lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x7, REG2; \ + add REG1, REG2, REG1; + + /* Do a user page table walk in MMU globals. Leaves physical PTE + * pointer in REG1. Jumps to FAIL_LABEL on early page table walk + * termination. Physical base of page tables is in PHYS_PGD which + * will not be modified. + * + * VADDR will not be clobbered, but REG1 and REG2 will. + */ +#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \ + sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x3, REG2; \ + lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x3, REG2; \ + lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + sllx REG1, 11, REG1; \ + andn REG2, 0x7, REG2; \ + add REG1, REG2, REG1; + +/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0. + * If no entry is found, FAIL_LABEL will be branched to. On success + * the resulting PTE value will be left in REG1. VADDR is preserved + * by this routine. + */ +#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \ + sethi %hi(prom_trans), REG1; \ + or REG1, %lo(prom_trans), REG1; \ +97: ldx [REG1 + 0x00], REG2; \ + brz,pn REG2, FAIL_LABEL; \ + nop; \ + ldx [REG1 + 0x08], REG3; \ + add REG2, REG3, REG3; \ + cmp REG2, VADDR; \ + bgu,pt %xcc, 98f; \ + cmp VADDR, REG3; \ + bgeu,pt %xcc, 98f; \ + ldx [REG1 + 0x10], REG3; \ + sub VADDR, REG2, REG2; \ + ba,pt %xcc, 99f; \ + add REG3, REG2, REG1; \ +98: ba,pt %xcc, 97b; \ + add REG1, (3 * 8), REG1; \ +99: + + /* We use a 32K TSB for the whole kernel, this allows to + * handle about 16MB of modules and vmalloc mappings without + * incurring many hash conflicts. + */ +#define KERNEL_TSB_SIZE_BYTES (32 * 1024) +#define KERNEL_TSB_NENTRIES \ + (KERNEL_TSB_SIZE_BYTES / 16) +#define KERNEL_TSB4M_NENTRIES 4096 + + /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL + * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries + * and the found TTE will be left in REG1. REG3 and REG4 must + * be an even/odd pair of registers. + * + * VADDR and TAG will be preserved and not clobbered by this macro. + */ +#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ + sethi %hi(swapper_tsb), REG1; \ + or REG1, %lo(swapper_tsb), REG1; \ + srlx VADDR, PAGE_SHIFT, REG2; \ + and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ + KTSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; + + /* This version uses a trick, the TAG is already (VADDR >> 22) so + * we can make use of that for the index computation. + */ +#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ + sethi %hi(swapper_4m_tsb), REG1; \ + or REG1, %lo(swapper_4m_tsb), REG1; \ + and TAG, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ + KTSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; + +#endif /* !(_SPARC64_TSB_H) */ diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 2784f80..2d5e3c4 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h @@ -93,7 +93,7 @@ #define SYSCALL_TRAP(routine, systbl) \ sethi %hi(109f), %g7; \ - ba,pt %xcc, scetrap; \ + ba,pt %xcc, etrap; \ 109: or %g7, %lo(109b), %g7; \ sethi %hi(systbl), %l7; \ ba,pt %xcc, routine; \ @@ -109,14 +109,14 @@ nop;nop;nop; #define TRAP_UTRAP(handler,lvl) \ - ldx [%g6 + TI_UTRAPS], %g1; \ - sethi %hi(109f), %g7; \ - brz,pn %g1, utrap; \ - or %g7, %lo(109f), %g7; \ - ba,pt %xcc, utrap; \ -109: ldx [%g1 + handler*8], %g1; \ - ba,pt %xcc, utrap_ill; \ - mov lvl, %o1; + mov handler, %g3; \ + ba,pt %xcc, utrap_trap; \ + mov lvl, %g4; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; #ifdef CONFIG_SUNOS_EMUL #define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table) @@ -136,8 +136,6 @@ #else #define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall) #endif -/* FIXME: Write these actually */ -#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall) #define BREAKPOINT_TRAP TRAP(breakpoint_trap) #define TRAP_IRQ(routine, level) \ @@ -182,6 +180,26 @@ #define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl) #endif +#define SUN4V_ITSB_MISS \ + ldxa [%g0] ASI_SCRATCHPAD, %g2; \ + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4; \ + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5; \ + srlx %g4, 22, %g6; \ + ba,pt %xcc, sun4v_itsb_miss; \ + nop; \ + nop; \ + nop; + +#define SUN4V_DTSB_MISS \ + ldxa [%g0] ASI_SCRATCHPAD, %g2; \ + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4; \ + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5; \ + srlx %g4, 22, %g6; \ + ba,pt %xcc, sun4v_dtsb_miss; \ + nop; \ + nop; \ + nop; + /* Before touching these macros, you owe it to yourself to go and * see how arch/sparc64/kernel/winfixup.S works... -DaveM * @@ -221,6 +239,31 @@ saved; retry; nop; nop; nop; nop; nop; nop; \ nop; nop; nop; nop; nop; nop; nop; nop; +#define SPILL_0_NORMAL_ETRAP \ +etrap_kernel_spill: \ + stx %l0, [%sp + STACK_BIAS + 0x00]; \ + stx %l1, [%sp + STACK_BIAS + 0x08]; \ + stx %l2, [%sp + STACK_BIAS + 0x10]; \ + stx %l3, [%sp + STACK_BIAS + 0x18]; \ + stx %l4, [%sp + STACK_BIAS + 0x20]; \ + stx %l5, [%sp + STACK_BIAS + 0x28]; \ + stx %l6, [%sp + STACK_BIAS + 0x30]; \ + stx %l7, [%sp + STACK_BIAS + 0x38]; \ + stx %i0, [%sp + STACK_BIAS + 0x40]; \ + stx %i1, [%sp + STACK_BIAS + 0x48]; \ + stx %i2, [%sp + STACK_BIAS + 0x50]; \ + stx %i3, [%sp + STACK_BIAS + 0x58]; \ + stx %i4, [%sp + STACK_BIAS + 0x60]; \ + stx %i5, [%sp + STACK_BIAS + 0x68]; \ + stx %i6, [%sp + STACK_BIAS + 0x70]; \ + stx %i7, [%sp + STACK_BIAS + 0x78]; \ + saved; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; + /* Normal 64bit spill */ #define SPILL_1_GENERIC(ASI) \ add %sp, STACK_BIAS + 0x00, %g1; \ @@ -254,6 +297,67 @@ b,a,pt %xcc, spill_fixup_mna; \ b,a,pt %xcc, spill_fixup; +#define SPILL_1_GENERIC_ETRAP \ +etrap_user_spill_64bit: \ + stxa %l0, [%sp + STACK_BIAS + 0x00] %asi; \ + stxa %l1, [%sp + STACK_BIAS + 0x08] %asi; \ + stxa %l2, [%sp + STACK_BIAS + 0x10] %asi; \ + stxa %l3, [%sp + STACK_BIAS + 0x18] %asi; \ + stxa %l4, [%sp + STACK_BIAS + 0x20] %asi; \ + stxa %l5, [%sp + STACK_BIAS + 0x28] %asi; \ + stxa %l6, [%sp + STACK_BIAS + 0x30] %asi; \ + stxa %l7, [%sp + STACK_BIAS + 0x38] %asi; \ + stxa %i0, [%sp + STACK_BIAS + 0x40] %asi; \ + stxa %i1, [%sp + STACK_BIAS + 0x48] %asi; \ + stxa %i2, [%sp + STACK_BIAS + 0x50] %asi; \ + stxa %i3, [%sp + STACK_BIAS + 0x58] %asi; \ + stxa %i4, [%sp + STACK_BIAS + 0x60] %asi; \ + stxa %i5, [%sp + STACK_BIAS + 0x68] %asi; \ + stxa %i6, [%sp + STACK_BIAS + 0x70] %asi; \ + stxa %i7, [%sp + STACK_BIAS + 0x78] %asi; \ + saved; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; \ + ba,a,pt %xcc, etrap_spill_fixup_64bit; \ + ba,a,pt %xcc, etrap_spill_fixup_64bit; \ + ba,a,pt %xcc, etrap_spill_fixup_64bit; + +#define SPILL_1_GENERIC_ETRAP_FIXUP \ +etrap_spill_fixup_64bit: \ + ldub [%g6 + TI_WSAVED], %g1; \ + sll %g1, 3, %g3; \ + add %g6, %g3, %g3; \ + stx %sp, [%g3 + TI_RWIN_SPTRS]; \ + sll %g1, 7, %g3; \ + add %g6, %g3, %g3; \ + stx %l0, [%g3 + TI_REG_WINDOW + 0x00]; \ + stx %l1, [%g3 + TI_REG_WINDOW + 0x08]; \ + stx %l2, [%g3 + TI_REG_WINDOW + 0x10]; \ + stx %l3, [%g3 + TI_REG_WINDOW + 0x18]; \ + stx %l4, [%g3 + TI_REG_WINDOW + 0x20]; \ + stx %l5, [%g3 + TI_REG_WINDOW + 0x28]; \ + stx %l6, [%g3 + TI_REG_WINDOW + 0x30]; \ + stx %l7, [%g3 + TI_REG_WINDOW + 0x38]; \ + stx %i0, [%g3 + TI_REG_WINDOW + 0x40]; \ + stx %i1, [%g3 + TI_REG_WINDOW + 0x48]; \ + stx %i2, [%g3 + TI_REG_WINDOW + 0x50]; \ + stx %i3, [%g3 + TI_REG_WINDOW + 0x58]; \ + stx %i4, [%g3 + TI_REG_WINDOW + 0x60]; \ + stx %i5, [%g3 + TI_REG_WINDOW + 0x68]; \ + stx %i6, [%g3 + TI_REG_WINDOW + 0x70]; \ + stx %i7, [%g3 + TI_REG_WINDOW + 0x78]; \ + add %g1, 1, %g1; \ + stb %g1, [%g6 + TI_WSAVED]; \ + saved; \ + rdpr %cwp, %g1; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop + /* Normal 32bit spill */ #define SPILL_2_GENERIC(ASI) \ srl %sp, 0, %sp; \ @@ -287,6 +391,68 @@ b,a,pt %xcc, spill_fixup_mna; \ b,a,pt %xcc, spill_fixup; +#define SPILL_2_GENERIC_ETRAP \ +etrap_user_spill_32bit: \ + srl %sp, 0, %sp; \ + stwa %l0, [%sp + 0x00] %asi; \ + stwa %l1, [%sp + 0x04] %asi; \ + stwa %l2, [%sp + 0x08] %asi; \ + stwa %l3, [%sp + 0x0c] %asi; \ + stwa %l4, [%sp + 0x10] %asi; \ + stwa %l5, [%sp + 0x14] %asi; \ + stwa %l6, [%sp + 0x18] %asi; \ + stwa %l7, [%sp + 0x1c] %asi; \ + stwa %i0, [%sp + 0x20] %asi; \ + stwa %i1, [%sp + 0x24] %asi; \ + stwa %i2, [%sp + 0x28] %asi; \ + stwa %i3, [%sp + 0x2c] %asi; \ + stwa %i4, [%sp + 0x30] %asi; \ + stwa %i5, [%sp + 0x34] %asi; \ + stwa %i6, [%sp + 0x38] %asi; \ + stwa %i7, [%sp + 0x3c] %asi; \ + saved; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; \ + nop; nop; nop; nop; \ + ba,a,pt %xcc, etrap_spill_fixup_32bit; \ + ba,a,pt %xcc, etrap_spill_fixup_32bit; \ + ba,a,pt %xcc, etrap_spill_fixup_32bit; + +#define SPILL_2_GENERIC_ETRAP_FIXUP \ +etrap_spill_fixup_32bit: \ + ldub [%g6 + TI_WSAVED], %g1; \ + sll %g1, 3, %g3; \ + add %g6, %g3, %g3; \ + stx %sp, [%g3 + TI_RWIN_SPTRS]; \ + sll %g1, 7, %g3; \ + add %g6, %g3, %g3; \ + stw %l0, [%g3 + TI_REG_WINDOW + 0x00]; \ + stw %l1, [%g3 + TI_REG_WINDOW + 0x04]; \ + stw %l2, [%g3 + TI_REG_WINDOW + 0x08]; \ + stw %l3, [%g3 + TI_REG_WINDOW + 0x0c]; \ + stw %l4, [%g3 + TI_REG_WINDOW + 0x10]; \ + stw %l5, [%g3 + TI_REG_WINDOW + 0x14]; \ + stw %l6, [%g3 + TI_REG_WINDOW + 0x18]; \ + stw %l7, [%g3 + TI_REG_WINDOW + 0x1c]; \ + stw %i0, [%g3 + TI_REG_WINDOW + 0x20]; \ + stw %i1, [%g3 + TI_REG_WINDOW + 0x24]; \ + stw %i2, [%g3 + TI_REG_WINDOW + 0x28]; \ + stw %i3, [%g3 + TI_REG_WINDOW + 0x2c]; \ + stw %i4, [%g3 + TI_REG_WINDOW + 0x30]; \ + stw %i5, [%g3 + TI_REG_WINDOW + 0x34]; \ + stw %i6, [%g3 + TI_REG_WINDOW + 0x38]; \ + stw %i7, [%g3 + TI_REG_WINDOW + 0x3c]; \ + add %g1, 1, %g1; \ + stb %g1, [%g6 + TI_WSAVED]; \ + saved; \ + rdpr %cwp, %g1; \ + sub %g1, 2, %g1; \ + ba,pt %xcc, etrap_save; \ + wrpr %g1, %cwp; \ + nop; nop; nop + #define SPILL_1_NORMAL SPILL_1_GENERIC(ASI_AIUP) #define SPILL_2_NORMAL SPILL_2_GENERIC(ASI_AIUP) #define SPILL_3_NORMAL SPILL_0_NORMAL @@ -325,6 +491,35 @@ restored; retry; nop; nop; nop; nop; nop; nop; \ nop; nop; nop; nop; nop; nop; nop; nop; +#define FILL_0_NORMAL_RTRAP \ +kern_rtt_fill: \ + rdpr %cwp, %g1; \ + sub %g1, 1, %g1; \ + wrpr %g1, %cwp; \ + ldx [%sp + STACK_BIAS + 0x00], %l0; \ + ldx [%sp + STACK_BIAS + 0x08], %l1; \ + ldx [%sp + STACK_BIAS + 0x10], %l2; \ + ldx [%sp + STACK_BIAS + 0x18], %l3; \ + ldx [%sp + STACK_BIAS + 0x20], %l4; \ + ldx [%sp + STACK_BIAS + 0x28], %l5; \ + ldx [%sp + STACK_BIAS + 0x30], %l6; \ + ldx [%sp + STACK_BIAS + 0x38], %l7; \ + ldx [%sp + STACK_BIAS + 0x40], %i0; \ + ldx [%sp + STACK_BIAS + 0x48], %i1; \ + ldx [%sp + STACK_BIAS + 0x50], %i2; \ + ldx [%sp + STACK_BIAS + 0x58], %i3; \ + ldx [%sp + STACK_BIAS + 0x60], %i4; \ + ldx [%sp + STACK_BIAS + 0x68], %i5; \ + ldx [%sp + STACK_BIAS + 0x70], %i6; \ + ldx [%sp + STACK_BIAS + 0x78], %i7; \ + restored; \ + add %g1, 1, %g1; \ + ba,pt %xcc, kern_rtt_restore; \ + wrpr %g1, %cwp; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; + + /* Normal 64bit fill */ #define FILL_1_GENERIC(ASI) \ add %sp, STACK_BIAS + 0x00, %g1; \ @@ -356,6 +551,33 @@ b,a,pt %xcc, fill_fixup_mna; \ b,a,pt %xcc, fill_fixup; +#define FILL_1_GENERIC_RTRAP \ +user_rtt_fill_64bit: \ + ldxa [%sp + STACK_BIAS + 0x00] %asi, %l0; \ + ldxa [%sp + STACK_BIAS + 0x08] %asi, %l1; \ + ldxa [%sp + STACK_BIAS + 0x10] %asi, %l2; \ + ldxa [%sp + STACK_BIAS + 0x18] %asi, %l3; \ + ldxa [%sp + STACK_BIAS + 0x20] %asi, %l4; \ + ldxa [%sp + STACK_BIAS + 0x28] %asi, %l5; \ + ldxa [%sp + STACK_BIAS + 0x30] %asi, %l6; \ + ldxa [%sp + STACK_BIAS + 0x38] %asi, %l7; \ + ldxa [%sp + STACK_BIAS + 0x40] %asi, %i0; \ + ldxa [%sp + STACK_BIAS + 0x48] %asi, %i1; \ + ldxa [%sp + STACK_BIAS + 0x50] %asi, %i2; \ + ldxa [%sp + STACK_BIAS + 0x58] %asi, %i3; \ + ldxa [%sp + STACK_BIAS + 0x60] %asi, %i4; \ + ldxa [%sp + STACK_BIAS + 0x68] %asi, %i5; \ + ldxa [%sp + STACK_BIAS + 0x70] %asi, %i6; \ + ldxa [%sp + STACK_BIAS + 0x78] %asi, %i7; \ + ba,pt %xcc, user_rtt_pre_restore; \ + restored; \ + nop; nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; nop; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; + + /* Normal 32bit fill */ #define FILL_2_GENERIC(ASI) \ srl %sp, 0, %sp; \ @@ -387,6 +609,34 @@ b,a,pt %xcc, fill_fixup_mna; \ b,a,pt %xcc, fill_fixup; +#define FILL_2_GENERIC_RTRAP \ +user_rtt_fill_32bit: \ + srl %sp, 0, %sp; \ + lduwa [%sp + 0x00] %asi, %l0; \ + lduwa [%sp + 0x04] %asi, %l1; \ + lduwa [%sp + 0x08] %asi, %l2; \ + lduwa [%sp + 0x0c] %asi, %l3; \ + lduwa [%sp + 0x10] %asi, %l4; \ + lduwa [%sp + 0x14] %asi, %l5; \ + lduwa [%sp + 0x18] %asi, %l6; \ + lduwa [%sp + 0x1c] %asi, %l7; \ + lduwa [%sp + 0x20] %asi, %i0; \ + lduwa [%sp + 0x24] %asi, %i1; \ + lduwa [%sp + 0x28] %asi, %i2; \ + lduwa [%sp + 0x2c] %asi, %i3; \ + lduwa [%sp + 0x30] %asi, %i4; \ + lduwa [%sp + 0x34] %asi, %i5; \ + lduwa [%sp + 0x38] %asi, %i6; \ + lduwa [%sp + 0x3c] %asi, %i7; \ + ba,pt %xcc, user_rtt_pre_restore; \ + restored; \ + nop; nop; nop; nop; nop; \ + nop; nop; nop; nop; nop; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup; + + #define FILL_1_NORMAL FILL_1_GENERIC(ASI_AIUP) #define FILL_2_NORMAL FILL_2_GENERIC(ASI_AIUP) #define FILL_3_NORMAL FILL_0_NORMAL diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h index c91d1e3..afe236b 100644 --- a/include/asm-sparc64/uaccess.h +++ b/include/asm-sparc64/uaccess.h @@ -114,16 +114,6 @@ case 8: __put_user_asm(data,x,addr,__pu_ret); break; \ default: __pu_ret = __put_user_bad(); break; \ } __pu_ret; }) -#define __put_user_nocheck_ret(data,addr,size,retval) ({ \ -register int __foo __asm__ ("l1"); \ -switch (size) { \ -case 1: __put_user_asm_ret(data,b,addr,retval,__foo); break; \ -case 2: __put_user_asm_ret(data,h,addr,retval,__foo); break; \ -case 4: __put_user_asm_ret(data,w,addr,retval,__foo); break; \ -case 8: __put_user_asm_ret(data,x,addr,retval,__foo); break; \ -default: if (__put_user_bad()) return retval; break; \ -} }) - #define __put_user_asm(x,size,addr,ret) \ __asm__ __volatile__( \ "/* Put user asm, inline. */\n" \ @@ -143,33 +133,6 @@ __asm__ __volatile__( \ : "=r" (ret) : "r" (x), "r" (__m(addr)), \ "i" (-EFAULT)) -#define __put_user_asm_ret(x,size,addr,ret,foo) \ -if (__builtin_constant_p(ret) && ret == -EFAULT) \ -__asm__ __volatile__( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, __ret_efault\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "r" (__m(addr))); \ -else \ -__asm__ __volatile__( \ - "/* Put user asm ret, inline. */\n" \ -"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ -"3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %3, %%o0\n\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\n\t" \ - : "=r" (foo) : "r" (x), "r" (__m(addr)), \ - "i" (ret)) - extern int __put_user_bad(void); #define __get_user_nocheck(data,addr,size,type) ({ \ @@ -289,14 +252,7 @@ copy_in_user(void __user *to, void __user *from, unsigned long size) } #define __copy_in_user copy_in_user -extern unsigned long __must_check __bzero_noasi(void __user *, unsigned long); - -static inline unsigned long __must_check -__clear_user(void __user *addr, unsigned long size) -{ - - return __bzero_noasi(addr, size); -} +extern unsigned long __must_check __clear_user(void __user *, unsigned long); #define clear_user __clear_user diff --git a/include/asm-sparc64/vdev.h b/include/asm-sparc64/vdev.h new file mode 100644 index 0000000..996e6be --- /dev/null +++ b/include/asm-sparc64/vdev.h @@ -0,0 +1,16 @@ +/* vdev.h: SUN4V virtual device interfaces and defines. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + +#ifndef _SPARC64_VDEV_H +#define _SPARC64_VDEV_H + +#include <linux/types.h> + +extern u32 sun4v_vdev_devhandle; +extern int sun4v_vdev_root; + +extern unsigned int sun4v_vdev_device_interrupt(unsigned int); + +#endif /* !(_SPARC64_VDEV_H) */ diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h index 8b3a7e4..8ce3f18 100644 --- a/include/asm-sparc64/xor.h +++ b/include/asm-sparc64/xor.h @@ -2,9 +2,11 @@ * include/asm-sparc64/xor.h * * High speed xor_block operation for RAID4/5 utilizing the - * UltraSparc Visual Instruction Set. + * UltraSparc Visual Instruction Set and Niagara block-init + * twin-load instructions. * * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,8 +18,7 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <asm/pstate.h> -#include <asm/asi.h> +#include <asm/spitfire.h> extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, @@ -37,4 +38,29 @@ static struct xor_block_template xor_block_VIS = { .do_5 = xor_vis_5, }; -#define XOR_TRY_TEMPLATES xor_speed(&xor_block_VIS) +extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + +static struct xor_block_template xor_block_niagara = { + .name = "Niagara", + .do_2 = xor_niagara_2, + .do_3 = xor_niagara_3, + .do_4 = xor_niagara_4, + .do_5 = xor_niagara_5, +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_VIS); \ + xor_speed(&xor_block_niagara); \ + } while (0) + +/* For VIS for everything except Niagara. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS) diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index 7198f12..231ba09 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -206,7 +206,6 @@ struct ArcProto { extern struct ArcProto *arc_proto_map[256], *arc_proto_default, *arc_bcast_proto, *arc_raw_proto; -extern struct ArcProto arc_proto_null; /* @@ -334,17 +333,9 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #define arcnet_dump_skb(dev,skb,desc) ; #endif -#if (ARCNET_DEBUG_MAX & D_RX) || (ARCNET_DEBUG_MAX & D_TX) -void arcnet_dump_packet(struct net_device *dev, int bufnum, char *desc, - int take_arcnet_lock); -#else -#define arcnet_dump_packet(dev, bufnum, desc,take_arcnet_lock) ; -#endif - void arcnet_unregister_proto(struct ArcProto *proto); irqreturn_t arcnet_interrupt(int irq, void *dev_id, struct pt_regs *regs); struct net_device *alloc_arcdev(char *name); -void arcnet_rx(struct net_device *dev, int bufnum); #endif /* __KERNEL__ */ #endif /* _LINUX_ARCDEVICE_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 860e7a4..56bb6a4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,7 +58,7 @@ struct cfq_io_context { * circular list of cfq_io_contexts belonging to a process io context */ struct list_head list; - struct cfq_queue *cfqq; + struct cfq_queue *cfqq[2]; void *key; struct io_context *ioc; @@ -69,6 +69,8 @@ struct cfq_io_context { unsigned long ttime_samples; unsigned long ttime_mean; + struct list_head queue_list; + void (*dtor)(struct cfq_io_context *); void (*exit)(struct cfq_io_context *); }; @@ -404,8 +406,6 @@ struct request_queue struct blk_queue_tag *queue_tags; - atomic_t refcnt; - unsigned int nr_sorted; unsigned int in_flight; @@ -424,6 +424,8 @@ struct request_queue struct request pre_flush_rq, bar_rq, post_flush_rq; struct request *orig_bar_rq; unsigned int bi_size; + + struct mutex sysfs_lock; }; #define RQ_INACTIVE (-1) @@ -725,7 +727,7 @@ extern long nr_blockdev_pages(void); int blk_get_queue(request_queue_t *); request_queue_t *blk_alloc_queue(gfp_t); request_queue_t *blk_alloc_queue_node(gfp_t, int); -#define blk_put_queue(q) blk_cleanup_queue((q)) +extern void blk_put_queue(request_queue_t *); /* * tag stuff diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 18cf1f3..ad133fc 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -48,10 +48,17 @@ struct elevator_ops elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; + void (*trim)(struct io_context *); }; #define ELV_NAME_MAX (16) +struct elv_fs_entry { + struct attribute attr; + ssize_t (*show)(elevator_t *, char *); + ssize_t (*store)(elevator_t *, const char *, size_t); +}; + /* * identifies an elevator type, such as AS or deadline */ @@ -60,7 +67,7 @@ struct elevator_type struct list_head list; struct elevator_ops ops; struct elevator_type *elevator_type; - struct kobj_type *elevator_ktype; + struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; }; @@ -74,6 +81,7 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct elevator_type *elevator_type; + struct mutex sysfs_lock; }; /* diff --git a/include/linux/if.h b/include/linux/if.h index ce627d9..12c6f6d 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -52,6 +52,9 @@ /* Private (from user) interface flags (netdevice->priv_flags). */ #define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */ #define IFF_EBRIDGE 0x2 /* Ethernet bridging device. */ +#define IFF_SLAVE_INACTIVE 0x4 /* bonding slave not the curr. active */ +#define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ +#define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 7a92c1c..ab08f35 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -61,6 +61,7 @@ #define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ #define ETH_P_IPX 0x8137 /* IPX over DIX */ #define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ +#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */ #define ETH_P_WCCP 0x883E /* Web-cache coordination protocol * defined in draft-wilson-wrec-wccp-v2-00.txt */ #define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */ diff --git a/include/linux/mv643xx.h b/include/linux/mv643xx.h index 0b08cd6..955d306 100644 --- a/include/linux/mv643xx.h +++ b/include/linux/mv643xx.h @@ -1214,6 +1214,7 @@ struct mv64xxx_i2c_pdata { #define MV643XX_ETH_FORCE_BP_MODE_NO_JAM 0 #define MV643XX_ETH_FORCE_BP_MODE_JAM_TX (1<<7) #define MV643XX_ETH_FORCE_BP_MODE_JAM_TX_ON_RX_ERR (1<<8) +#define MV643XX_ETH_SERIAL_PORT_CONTROL_RESERVED (1<<9) #define MV643XX_ETH_FORCE_LINK_FAIL 0 #define MV643XX_ETH_DO_NOT_FORCE_LINK_FAIL (1<<10) #define MV643XX_ETH_RETRANSMIT_16_ATTEMPTS 0 @@ -1243,6 +1244,8 @@ struct mv64xxx_i2c_pdata { #define MV643XX_ETH_SET_MII_SPEED_TO_10 0 #define MV643XX_ETH_SET_MII_SPEED_TO_100 (1<<24) +#define MV643XX_ETH_MAX_RX_PACKET_MASK (0x7<<17) + #define MV643XX_ETH_PORT_SERIAL_CONTROL_DEFAULT_VALUE \ MV643XX_ETH_DO_NOT_FORCE_LINK_PASS | \ MV643XX_ETH_ENABLE_AUTO_NEG_FOR_DUPLX | \ @@ -1285,23 +1288,15 @@ struct mv64xxx_i2c_pdata { #define MV643XX_ETH_NAME "mv643xx_eth" struct mv643xx_eth_platform_data { - /* - * Non-values for mac_addr, phy_addr, port_config, etc. - * override the default value. Setting the corresponding - * force_* field, causes the default value to be overridden - * even when zero. - */ - unsigned int force_phy_addr:1; - unsigned int force_port_config:1; - unsigned int force_port_config_extend:1; - unsigned int force_port_sdma_config:1; - unsigned int force_port_serial_control:1; - int phy_addr; char *mac_addr; /* pointer to mac address */ - u32 port_config; - u32 port_config_extend; - u32 port_sdma_config; - u32 port_serial_control; + u16 force_phy_addr; /* force override if phy_addr == 0 */ + u16 phy_addr; + + /* If speed is 0, then speed and duplex are autonegotiated. */ + int speed; /* 0, SPEED_10, SPEED_100, SPEED_1000 */ + int duplex; /* DUPLEX_HALF or DUPLEX_FULL */ + + /* non-zero values of the following fields override defaults */ u32 tx_queue_size; u32 rx_queue_size; u32 tx_sram_addr; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 4041122..57abcea 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -127,6 +127,9 @@ /* Hilscher netx */ #define PORT_NETX 71 +/* SUN4V Hypervisor Console */ +#define PORT_SUNHV 72 + #ifdef __KERNEL__ #include <linux/config.h> diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 9a92aef..4725ff8 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -220,6 +220,7 @@ struct ieee80211_snap_hdr { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 +#define WLAN_AUTH_LEAP 2 #define WLAN_AUTH_CHALLENGE_LEN 128 @@ -299,6 +300,23 @@ enum ieee80211_reasoncode { WLAN_REASON_CIPHER_SUITE_REJECTED = 24, }; +/* Action categories - 802.11h */ +enum ieee80211_actioncategories { + WLAN_ACTION_SPECTRUM_MGMT = 0, + /* Reserved 1-127 */ + /* Error 128-255 */ +}; + +/* Action details - 802.11h */ +enum ieee80211_actiondetails { + WLAN_ACTION_CATEGORY_MEASURE_REQUEST = 0, + WLAN_ACTION_CATEGORY_MEASURE_REPORT = 1, + WLAN_ACTION_CATEGORY_TPC_REQUEST = 2, + WLAN_ACTION_CATEGORY_TPC_REPORT = 3, + WLAN_ACTION_CATEGORY_CHANNEL_SWITCH = 4, + /* 5 - 255 Reserved */ +}; + #define IEEE80211_STATMASK_SIGNAL (1<<0) #define IEEE80211_STATMASK_RSSI (1<<1) #define IEEE80211_STATMASK_NOISE (1<<2) @@ -377,6 +395,8 @@ struct ieee80211_rx_stats { u8 mask; u8 freq; u16 len; + u64 tsf; + u32 beacon_time; }; /* IEEE 802.11 requires that STA supports concurrent reception of at least @@ -608,6 +628,28 @@ struct ieee80211_auth { struct ieee80211_info_element info_element[0]; } __attribute__ ((packed)); +struct ieee80211_channel_switch { + u8 id; + u8 len; + u8 mode; + u8 channel; + u8 count; +} __attribute__ ((packed)); + +struct ieee80211_action { + struct ieee80211_hdr_3addr header; + u8 category; + u8 action; + union { + struct ieee80211_action_exchange { + u8 token; + struct ieee80211_info_element info_element[0]; + } exchange; + struct ieee80211_channel_switch channel_switch; + + } format; +} __attribute__ ((packed)); + struct ieee80211_disassoc { struct ieee80211_hdr_3addr header; __le16 reason; @@ -692,7 +734,15 @@ struct ieee80211_txb { /* QoS structure */ #define NETWORK_HAS_QOS_PARAMETERS (1<<3) #define NETWORK_HAS_QOS_INFORMATION (1<<4) -#define NETWORK_HAS_QOS_MASK (NETWORK_HAS_QOS_PARAMETERS | NETWORK_HAS_QOS_INFORMATION) +#define NETWORK_HAS_QOS_MASK (NETWORK_HAS_QOS_PARAMETERS | \ + NETWORK_HAS_QOS_INFORMATION) + +/* 802.11h */ +#define NETWORK_HAS_POWER_CONSTRAINT (1<<5) +#define NETWORK_HAS_CSA (1<<6) +#define NETWORK_HAS_QUIET (1<<7) +#define NETWORK_HAS_IBSS_DFS (1<<8) +#define NETWORK_HAS_TPC_REPORT (1<<9) #define QOS_QUEUE_NUM 4 #define QOS_OUI_LEN 3 @@ -748,6 +798,91 @@ struct ieee80211_tim_parameters { /*******************************************************/ +enum { /* ieee80211_basic_report.map */ + IEEE80211_BASIC_MAP_BSS = (1 << 0), + IEEE80211_BASIC_MAP_OFDM = (1 << 1), + IEEE80211_BASIC_MAP_UNIDENTIFIED = (1 << 2), + IEEE80211_BASIC_MAP_RADAR = (1 << 3), + IEEE80211_BASIC_MAP_UNMEASURED = (1 << 4), + /* Bits 5-7 are reserved */ + +}; +struct ieee80211_basic_report { + u8 channel; + __le64 start_time; + __le16 duration; + u8 map; +} __attribute__ ((packed)); + +enum { /* ieee80211_measurement_request.mode */ + /* Bit 0 is reserved */ + IEEE80211_MEASUREMENT_ENABLE = (1 << 1), + IEEE80211_MEASUREMENT_REQUEST = (1 << 2), + IEEE80211_MEASUREMENT_REPORT = (1 << 3), + /* Bits 4-7 are reserved */ +}; + +enum { + IEEE80211_REPORT_BASIC = 0, /* required */ + IEEE80211_REPORT_CCA = 1, /* optional */ + IEEE80211_REPORT_RPI = 2, /* optional */ + /* 3-255 reserved */ +}; + +struct ieee80211_measurement_params { + u8 channel; + __le64 start_time; + __le16 duration; +} __attribute__ ((packed)); + +struct ieee80211_measurement_request { + struct ieee80211_info_element ie; + u8 token; + u8 mode; + u8 type; + struct ieee80211_measurement_params params[0]; +} __attribute__ ((packed)); + +struct ieee80211_measurement_report { + struct ieee80211_info_element ie; + u8 token; + u8 mode; + u8 type; + union { + struct ieee80211_basic_report basic[0]; + } u; +} __attribute__ ((packed)); + +struct ieee80211_tpc_report { + u8 transmit_power; + u8 link_margin; +} __attribute__ ((packed)); + +struct ieee80211_channel_map { + u8 channel; + u8 map; +} __attribute__ ((packed)); + +struct ieee80211_ibss_dfs { + struct ieee80211_info_element ie; + u8 owner[ETH_ALEN]; + u8 recovery_interval; + struct ieee80211_channel_map channel_map[0]; +}; + +struct ieee80211_csa { + u8 mode; + u8 channel; + u8 count; +} __attribute__ ((packed)); + +struct ieee80211_quiet { + u8 count; + u8 period; + u8 duration; + u8 offset; +} __attribute__ ((packed)); + struct ieee80211_network { /* These entries are used to identify a unique network */ u8 bssid[ETH_ALEN]; @@ -767,7 +902,7 @@ struct ieee80211_network { u8 rates_ex_len; unsigned long last_scanned; u8 mode; - u8 flags; + u32 flags; u32 last_associate; u32 time_stamp[2]; u16 beacon_interval; @@ -779,6 +914,25 @@ struct ieee80211_network { u8 rsn_ie[MAX_WPA_IE_LEN]; size_t rsn_ie_len; struct ieee80211_tim_parameters tim; + + /* 802.11h info */ + + /* Power Constraint - mandatory if spctrm mgmt required */ + u8 power_constraint; + + /* TPC Report - mandatory if spctrm mgmt required */ + struct ieee80211_tpc_report tpc_report; + + /* IBSS DFS - mandatory if spctrm mgmt required and IBSS + * NOTE: This is variable length and so must be allocated dynamically */ + struct ieee80211_ibss_dfs *ibss_dfs; + + /* Channel Switch Announcement - optional if spctrm mgmt required */ + struct ieee80211_csa csa; + + /* Quiet - optional if spctrm mgmt required */ + struct ieee80211_quiet quiet; + struct list_head list; }; @@ -924,7 +1078,10 @@ struct ieee80211_device { int (*handle_auth) (struct net_device * dev, struct ieee80211_auth * auth); int (*handle_deauth) (struct net_device * dev, - struct ieee80211_auth * auth); + struct ieee80211_deauth * auth); + int (*handle_action) (struct net_device * dev, + struct ieee80211_action * action, + struct ieee80211_rx_stats * stats); int (*handle_disassoc) (struct net_device * dev, struct ieee80211_disassoc * assoc); int (*handle_beacon) (struct net_device * dev, @@ -1093,6 +1250,7 @@ extern int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, extern void ieee80211_rx_mgt(struct ieee80211_device *ieee, struct ieee80211_hdr_4addr *header, struct ieee80211_rx_stats *stats); +extern void ieee80211_network_reset(struct ieee80211_network *network); /* ieee80211_geo.c */ extern const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device @@ -1105,6 +1263,11 @@ extern int ieee80211_is_valid_channel(struct ieee80211_device *ieee, extern int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel); extern u8 ieee80211_freq_to_channel(struct ieee80211_device *ieee, u32 freq); +extern u8 ieee80211_get_channel_flags(struct ieee80211_device *ieee, + u8 channel); +extern const struct ieee80211_channel *ieee80211_get_channel(struct + ieee80211_device + *ieee, u8 channel); /* ieee80211_wx.c */ extern int ieee80211_wx_get_scan(struct ieee80211_device *ieee, @@ -1122,6 +1285,14 @@ extern int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, extern int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, struct iw_request_info *info, union iwreq_data *wrqu, char *extra); +extern int ieee80211_wx_set_auth(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra); +extern int ieee80211_wx_get_auth(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra); static inline void ieee80211_increment_scans(struct ieee80211_device *ieee) { diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h index cd82c3e..eb47641 100644 --- a/include/net/ieee80211_crypt.h +++ b/include/net/ieee80211_crypt.h @@ -47,7 +47,8 @@ struct ieee80211_crypto_ops { /* deinitialize crypto context and free allocated private data */ void (*deinit) (void *priv); - int (*build_iv) (struct sk_buff * skb, int hdr_len, void *priv); + int (*build_iv) (struct sk_buff * skb, int hdr_len, + u8 *key, int keylen, void *priv); /* encrypt/decrypt return < 0 on error or >= 0 on success. The return * value from decrypt_mpdu is passed as the keyidx value for diff --git a/kernel/exit.c b/kernel/exit.c index 531aadc..d1e8d500 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -807,8 +807,6 @@ fastcall NORET_TYPE void do_exit(long code) panic("Attempted to kill the idle task!"); if (unlikely(tsk->pid == 1)) panic("Attempted to kill init!"); - if (tsk->io_context) - exit_io_context(); if (unlikely(current->ptrace & PT_TRACE_EXIT)) { current->ptrace_message = code; @@ -822,6 +820,8 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(tsk->flags & PF_EXITING)) { printk(KERN_ALERT "Fixing recursive fault but reboot is needed!\n"); + if (tsk->io_context) + exit_io_context(); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } @@ -881,6 +881,9 @@ fastcall NORET_TYPE void do_exit(long code) */ mutex_debug_check_no_locks_held(tsk); + if (tsk->io_context) + exit_io_context(); + /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); BUG_ON(tsk->flags & PF_DEAD); diff --git a/net/Kconfig b/net/Kconfig index 5126f58..4193cdc 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -224,6 +224,9 @@ source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/ieee80211/Kconfig" +config WIRELESS_EXT + bool + endif # if NET endmenu # Networking diff --git a/net/core/Makefile b/net/core/Makefile index 630da0f..79fe12c 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -14,5 +14,5 @@ obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o -obj-$(CONFIG_NET_RADIO) += wireless.o +obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o diff --git a/net/core/dev.c b/net/core/dev.c index 2afb0de..ef56c03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -110,10 +110,8 @@ #include <linux/netpoll.h> #include <linux/rcupdate.h> #include <linux/delay.h> -#ifdef CONFIG_NET_RADIO -#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ +#include <linux/wireless.h> #include <net/iw_handler.h> -#endif /* CONFIG_NET_RADIO */ #include <asm/current.h> /* @@ -1448,8 +1446,29 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) + if (dev->master) { + /* + * On bonding slaves other than the currently active + * slave, suppress duplicates except for 802.3ad + * ETH_P_SLOW and alb non-mcast/bcast. + */ + if (dev->priv_flags & IFF_SLAVE_INACTIVE) { + if (dev->master->priv_flags & IFF_MASTER_ALB) { + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + goto keep; + } + + if (dev->master->priv_flags & IFF_MASTER_8023AD && + skb->protocol == __constant_htons(ETH_P_SLOW)) + goto keep; + + kfree_skb(skb); + return NULL; + } +keep: skb->dev = dev->master; + } return dev; } @@ -1593,6 +1612,9 @@ int netif_receive_skb(struct sk_buff *skb) orig_dev = skb_bond(skb); + if (!orig_dev) + return NET_RX_DROP; + __get_cpu_var(netdev_rx_stat).total++; skb->h.raw = skb->nh.raw = skb->data; @@ -2028,7 +2050,7 @@ static struct file_operations softnet_seq_fops = { .release = seq_release, }; -#ifdef WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT extern int wireless_proc_init(void); #else #define wireless_proc_init() 0 @@ -2582,7 +2604,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) ret = -EFAULT; return ret; } -#ifdef WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { /* If command is `set a parameter', or @@ -2603,7 +2625,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) ret = -EFAULT; return ret; } -#endif /* WIRELESS_EXT */ +#endif /* CONFIG_WIRELESS_EXT */ return -EINVAL; } } diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c index ecc9bb1..cb71d79 100644 --- a/net/ieee80211/ieee80211_crypt.c +++ b/net/ieee80211/ieee80211_crypt.c @@ -18,7 +18,6 @@ #include <linux/string.h> #include <net/ieee80211.h> - MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("HostAP crypto"); MODULE_LICENSE("GPL"); @@ -33,11 +32,11 @@ static DEFINE_SPINLOCK(ieee80211_crypto_lock); void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) { - struct ieee80211_crypt_data *entry, *next; + struct ieee80211_crypt_data *entry, *next; unsigned long flags; spin_lock_irqsave(&ieee->lock, flags); - list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) { + list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) { if (atomic_read(&entry->refcnt) != 0 && !force) continue; @@ -141,9 +140,9 @@ int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); return -EINVAL; - found: + found: printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " - "'%s'\n", ops->name); + "'%s'\n", ops->name); list_del(&alg->list); spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); kfree(alg); @@ -163,7 +162,7 @@ struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); return NULL; - found: + found: spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); return alg->ops; } diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 3840d19..78b2d13 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -190,7 +190,8 @@ static void ccmp_init_blocks(struct crypto_tfm *tfm, ieee80211_ccmp_aes_encrypt(tfm, b0, s0); } -static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, void *priv) +static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, + u8 *aeskey, int keylen, void *priv) { struct ieee80211_ccmp_data *key = priv; int i; @@ -199,6 +200,9 @@ static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, void *priv) if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) return -1; + if (aeskey != NULL && keylen >= CCMP_TK_LEN) + memcpy(aeskey, key->key, CCMP_TK_LEN); + pos = skb_push(skb, CCMP_HDR_LEN); memmove(pos, pos + CCMP_HDR_LEN, hdr_len); pos += hdr_len; @@ -238,7 +242,7 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; data_len = skb->len - hdr_len; - len = ieee80211_ccmp_hdr(skb, hdr_len, priv); + len = ieee80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); if (len < 0) return -1; diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index e098832..93def94 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -80,10 +80,9 @@ static void *ieee80211_tkip_init(int key_idx) { struct ieee80211_tkip_data *priv; - priv = kmalloc(sizeof(*priv), GFP_ATOMIC); + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) goto fail; - memset(priv, 0, sizeof(*priv)); priv->key_idx = key_idx; @@ -271,34 +270,33 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, #endif } -static u8 *ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, void *priv) +static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, + u8 * rc4key, int keylen, void *priv) { struct ieee80211_tkip_data *tkey = priv; int len; - u8 *rc4key, *pos, *icv; + u8 *pos; struct ieee80211_hdr_4addr *hdr; - u32 crc; hdr = (struct ieee80211_hdr_4addr *)skb->data; if (skb_headroom(skb) < 8 || skb->len < hdr_len) - return NULL; + return -1; + + if (rc4key == NULL || keylen < 16) + return -1; if (!tkey->tx_phase1_done) { tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, tkey->tx_iv32); tkey->tx_phase1_done = 1; } - rc4key = kmalloc(16, GFP_ATOMIC); - if (!rc4key) - return NULL; tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); len = skb->len - hdr_len; pos = skb_push(skb, 8); memmove(pos, pos + 8, hdr_len); pos += hdr_len; - icv = skb_put(skb, 4); *pos++ = *rc4key; *pos++ = *(rc4key + 1); @@ -309,28 +307,28 @@ static u8 *ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, void *priv) *pos++ = (tkey->tx_iv32 >> 16) & 0xff; *pos++ = (tkey->tx_iv32 >> 24) & 0xff; - crc = ~crc32_le(~0, pos, len); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; + tkey->tx_iv16++; + if (tkey->tx_iv16 == 0) { + tkey->tx_phase1_done = 0; + tkey->tx_iv32++; + } - return rc4key; + return 8; } static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; int len; - const u8 *rc4key; - u8 *pos; + u8 rc4key[16], *pos, *icv; + u32 crc; struct scatterlist sg; if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { struct ieee80211_hdr_4addr *hdr = (struct ieee80211_hdr_4addr *)skb->data; - printk(KERN_DEBUG "TKIP countermeasures: dropped " + printk(KERN_DEBUG ": TKIP countermeasures: dropped " "TX packet to " MAC_FMT "\n", MAC_ARG(hdr->addr1)); } @@ -343,22 +341,23 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) len = skb->len - hdr_len; pos = skb->data + hdr_len; - rc4key = ieee80211_tkip_hdr(skb, hdr_len, priv); - if (!rc4key) + if ((ieee80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) return -1; + icv = skb_put(skb, 4); + + crc = ~crc32_le(~0, pos, len); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = len + 4; crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4); - tkey->tx_iv16++; - if (tkey->tx_iv16 == 0) { - tkey->tx_phase1_done = 0; - tkey->tx_iv32++; - } - return 0; } @@ -379,7 +378,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { - printk(KERN_DEBUG "TKIP countermeasures: dropped " + printk(KERN_DEBUG ": TKIP countermeasures: dropped " "received packet from " MAC_FMT "\n", MAC_ARG(hdr->addr2)); } @@ -695,6 +694,7 @@ static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { .name = "TKIP", .init = ieee80211_tkip_init, .deinit = ieee80211_tkip_deinit, + .build_iv = ieee80211_tkip_hdr, .encrypt_mpdu = ieee80211_tkip_encrypt, .decrypt_mpdu = ieee80211_tkip_decrypt, .encrypt_msdu = ieee80211_michael_mic_add, diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index f8dca31..649e581 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -76,7 +76,8 @@ static void prism2_wep_deinit(void *priv) } /* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ -static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, void *priv) +static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, + u8 *key, int keylen, void *priv) { struct prism2_wep_data *wep = priv; u32 klen, len; @@ -131,7 +132,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; /* add the IV to the frame */ - if (prism2_wep_build_iv(skb, hdr_len, priv)) + if (prism2_wep_build_iv(skb, hdr_len, NULL, 0, priv)) return -1; /* Copy the IV into the first 3 bytes of the key */ diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c index 610cc5c..192243a 100644 --- a/net/ieee80211/ieee80211_geo.c +++ b/net/ieee80211/ieee80211_geo.c @@ -50,7 +50,8 @@ int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel) /* Driver needs to initialize the geography map before using * these helper functions */ - BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); + if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) + return 0; if (ieee->freq_band & IEEE80211_24GHZ_BAND) for (i = 0; i < ieee->geo.bg_channels; i++) @@ -58,13 +59,15 @@ int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel) * this is a B only channel, we don't see it * as valid. */ if ((ieee->geo.bg[i].channel == channel) && + !(ieee->geo.bg[i].flags & IEEE80211_CH_INVALID) && (!(ieee->mode & IEEE_G) || !(ieee->geo.bg[i].flags & IEEE80211_CH_B_ONLY))) return IEEE80211_24GHZ_BAND; if (ieee->freq_band & IEEE80211_52GHZ_BAND) for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].channel == channel) + if ((ieee->geo.a[i].channel == channel) && + !(ieee->geo.a[i].flags & IEEE80211_CH_INVALID)) return IEEE80211_52GHZ_BAND; return 0; @@ -76,7 +79,8 @@ int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel) /* Driver needs to initialize the geography map before using * these helper functions */ - BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); + if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) + return -1; if (ieee->freq_band & IEEE80211_24GHZ_BAND) for (i = 0; i < ieee->geo.bg_channels; i++) @@ -97,7 +101,8 @@ u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq) /* Driver needs to initialize the geography map before using * these helper functions */ - BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); + if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) + return 0; freq /= 100000; @@ -133,6 +138,41 @@ const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device *ieee) return &ieee->geo; } +u8 ieee80211_get_channel_flags(struct ieee80211_device * ieee, u8 channel) +{ + int index = ieee80211_channel_to_index(ieee, channel); + + if (index == -1) + return IEEE80211_CH_INVALID; + + if (channel <= IEEE80211_24GHZ_CHANNELS) + return ieee->geo.bg[index].flags; + + return ieee->geo.a[index].flags; +} + +static const struct ieee80211_channel bad_channel = { + .channel = 0, + .flags = IEEE80211_CH_INVALID, + .max_power = 0, +}; + +const struct ieee80211_channel *ieee80211_get_channel(struct ieee80211_device + *ieee, u8 channel) +{ + int index = ieee80211_channel_to_index(ieee, channel); + + if (index == -1) + return &bad_channel; + + if (channel <= IEEE80211_24GHZ_CHANNELS) + return &ieee->geo.bg[index]; + + return &ieee->geo.a[index]; +} + +EXPORT_SYMBOL(ieee80211_get_channel); +EXPORT_SYMBOL(ieee80211_get_channel_flags); EXPORT_SYMBOL(ieee80211_is_valid_channel); EXPORT_SYMBOL(ieee80211_freq_to_channel); EXPORT_SYMBOL(ieee80211_channel_to_index); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 90d18b7..2cb84d8 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -82,10 +82,28 @@ static int ieee80211_networks_allocate(struct ieee80211_device *ieee) return 0; } +void ieee80211_network_reset(struct ieee80211_network *network) +{ + if (!network) + return; + + if (network->ibss_dfs) { + kfree(network->ibss_dfs); + network->ibss_dfs = NULL; + } +} + static inline void ieee80211_networks_free(struct ieee80211_device *ieee) { + int i; + if (!ieee->networks) return; + + for (i = 0; i < MAX_NETWORK_COUNT; i++) + if (ieee->networks[i].ibss_dfs) + kfree(ieee->networks[i].ibss_dfs); + kfree(ieee->networks); ieee->networks = NULL; } @@ -195,7 +213,7 @@ void free_ieee80211(struct net_device *dev) static int debug = 0; u32 ieee80211_debug_level = 0; -struct proc_dir_entry *ieee80211_proc = NULL; +static struct proc_dir_entry *ieee80211_proc = NULL; static int show_debug_level(char *page, char **start, off_t offset, int count, int *eof, void *data) diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 7ac6a71..a7f2a64 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -369,8 +369,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* Put this code here so that we avoid duplicating it in all * Rx paths. - Jean II */ +#ifdef CONFIG_WIRELESS_EXT #ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ -#ifdef CONFIG_NET_RADIO /* If spy monitoring on */ if (ieee->spy_data.spy_number > 0) { struct iw_quality wstats; @@ -397,8 +397,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* Update spy records */ wireless_spy_update(ieee->dev, hdr->addr2, &wstats); } -#endif /* CONFIG_NET_RADIO */ #endif /* IW_WIRELESS_SPY */ +#endif /* CONFIG_WIRELESS_EXT */ #ifdef NOT_YET hostap_update_rx_stats(local->ap, hdr, rx_stats); @@ -574,7 +574,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* skb: hdr + (possibly fragmented) plaintext payload */ // PR: FIXME: hostap has additional conditions in the "if" below: // ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) && - if ((frag != 0 || (fc & IEEE80211_FCTL_MOREFRAGS))) { + if ((frag != 0) || (fc & IEEE80211_FCTL_MOREFRAGS)) { int flen; struct sk_buff *frag_skb = ieee80211_frag_cache_get(ieee, hdr); IEEE80211_DEBUG_FRAG("Rx Fragment received (%u)\n", frag); @@ -754,7 +754,14 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, memset(skb->cb, 0, sizeof(skb->cb)); skb->dev = dev; skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */ - netif_rx(skb); + if (netif_rx(skb) == NET_RX_DROP) { + /* netif_rx always succeeds, but it might drop + * the packet. If it drops the packet, we log that + * in our stats. */ + IEEE80211_DEBUG_DROP + ("RX: netif_rx dropped the packet\n"); + stats->rx_dropped++; + } } rx_exit: @@ -930,6 +937,45 @@ static int ieee80211_parse_qos_info_param_IE(struct ieee80211_info_element return rc; } +#ifdef CONFIG_IEEE80211_DEBUG +#define MFIE_STRING(x) case MFIE_TYPE_ ##x: return #x + +static const char *get_info_element_string(u16 id) +{ + switch (id) { + MFIE_STRING(SSID); + MFIE_STRING(RATES); + MFIE_STRING(FH_SET); + MFIE_STRING(DS_SET); + MFIE_STRING(CF_SET); + MFIE_STRING(TIM); + MFIE_STRING(IBSS_SET); + MFIE_STRING(COUNTRY); + MFIE_STRING(HOP_PARAMS); + MFIE_STRING(HOP_TABLE); + MFIE_STRING(REQUEST); + MFIE_STRING(CHALLENGE); + MFIE_STRING(POWER_CONSTRAINT); + MFIE_STRING(POWER_CAPABILITY); + MFIE_STRING(TPC_REQUEST); + MFIE_STRING(TPC_REPORT); + MFIE_STRING(SUPP_CHANNELS); + MFIE_STRING(CSA); + MFIE_STRING(MEASURE_REQUEST); + MFIE_STRING(MEASURE_REPORT); + MFIE_STRING(QUIET); + MFIE_STRING(IBSS_DFS); + MFIE_STRING(ERP_INFO); + MFIE_STRING(RSN); + MFIE_STRING(RATES_EX); + MFIE_STRING(GENERIC); + MFIE_STRING(QOS_PARAMETER); + default: + return "UNKNOWN"; + } +} +#endif + static int ieee80211_parse_info_param(struct ieee80211_info_element *info_element, u16 length, struct ieee80211_network *network) @@ -1040,7 +1086,9 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element break; case MFIE_TYPE_TIM: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_TIM: ignored\n"); + network->tim.tim_count = info_element->data[0]; + network->tim.tim_period = info_element->data[1]; + IEEE80211_DEBUG_MGMT("MFIE_TYPE_TIM: partially ignored\n"); break; case MFIE_TYPE_ERP_INFO: @@ -1091,10 +1139,49 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element printk(KERN_ERR "QoS Error need to parse QOS_PARAMETER IE\n"); break; + /* 802.11h */ + case MFIE_TYPE_POWER_CONSTRAINT: + network->power_constraint = info_element->data[0]; + network->flags |= NETWORK_HAS_POWER_CONSTRAINT; + break; + + case MFIE_TYPE_CSA: + network->power_constraint = info_element->data[0]; + network->flags |= NETWORK_HAS_CSA; + break; + + case MFIE_TYPE_QUIET: + network->quiet.count = info_element->data[0]; + network->quiet.period = info_element->data[1]; + network->quiet.duration = info_element->data[2]; + network->quiet.offset = info_element->data[3]; + network->flags |= NETWORK_HAS_QUIET; + break; + + case MFIE_TYPE_IBSS_DFS: + if (network->ibss_dfs) + break; + network->ibss_dfs = + kmalloc(info_element->len, GFP_ATOMIC); + if (!network->ibss_dfs) + return 1; + memcpy(network->ibss_dfs, info_element->data, + info_element->len); + network->flags |= NETWORK_HAS_IBSS_DFS; + break; + + case MFIE_TYPE_TPC_REPORT: + network->tpc_report.transmit_power = + info_element->data[0]; + network->tpc_report.link_margin = info_element->data[1]; + network->flags |= NETWORK_HAS_TPC_REPORT; + break; default: - IEEE80211_DEBUG_MGMT("unsupported IE %d\n", - info_element->id); + IEEE80211_DEBUG_MGMT + ("Unsupported info element: %s (%d)\n", + get_info_element_string(info_element->id), + info_element->id); break; } @@ -1110,7 +1197,9 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct ieee80211_assoc_response *frame, struct ieee80211_rx_stats *stats) { - struct ieee80211_network network_resp; + struct ieee80211_network network_resp = { + .ibss_dfs = NULL, + }; struct ieee80211_network *network = &network_resp; struct net_device *dev = ieee->dev; @@ -1253,7 +1342,22 @@ static void update_network(struct ieee80211_network *dst, int qos_active; u8 old_param; - memcpy(&dst->stats, &src->stats, sizeof(struct ieee80211_rx_stats)); + ieee80211_network_reset(dst); + dst->ibss_dfs = src->ibss_dfs; + + /* We only update the statistics if they were created by receiving + * the network information on the actual channel the network is on. + * + * This keeps beacons received on neighbor channels from bringing + * down the signal level of an AP. */ + if (dst->channel == src->stats.received_channel) + memcpy(&dst->stats, &src->stats, + sizeof(struct ieee80211_rx_stats)); + else + IEEE80211_DEBUG_SCAN("Network " MAC_FMT " info received " + "off channel (%d vs. %d)\n", MAC_ARG(src->bssid), + dst->channel, src->stats.received_channel); + dst->capability = src->capability; memcpy(dst->rates, src->rates, src->rates_len); dst->rates_len = src->rates_len; @@ -1269,6 +1373,7 @@ static void update_network(struct ieee80211_network *dst, dst->listen_interval = src->listen_interval; dst->atim_window = src->atim_window; dst->erp_value = src->erp_value; + dst->tim = src->tim; memcpy(dst->wpa_ie, src->wpa_ie, src->wpa_ie_len); dst->wpa_ie_len = src->wpa_ie_len; @@ -1313,7 +1418,9 @@ static void ieee80211_process_probe_response(struct ieee80211_device *stats) { struct net_device *dev = ieee->dev; - struct ieee80211_network network; + struct ieee80211_network network = { + .ibss_dfs = NULL, + }; struct ieee80211_network *target; struct ieee80211_network *oldest = NULL; #ifdef CONFIG_IEEE80211_DEBUG @@ -1386,6 +1493,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device escape_essid(target->ssid, target->ssid_len), MAC_ARG(target->bssid)); + ieee80211_network_reset(target); } else { /* Otherwise just pull from the free list */ target = list_entry(ieee->network_free_list.next, @@ -1402,6 +1510,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device "BEACON" : "PROBE RESPONSE"); #endif memcpy(target, &network, sizeof(*target)); + network.ibss_dfs = NULL; list_add_tail(&target->list, &ieee->network_list); } else { IEEE80211_DEBUG_SCAN("Updating '%s' (" MAC_FMT ") via %s.\n", @@ -1411,6 +1520,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); update_network(target, &network); + network.ibss_dfs = NULL; } spin_unlock_irqrestore(&ieee->lock, flags); @@ -1495,10 +1605,43 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, header); break; + case IEEE80211_STYPE_ACTION: + IEEE80211_DEBUG_MGMT("ACTION\n"); + if (ieee->handle_action) + ieee->handle_action(ieee->dev, + (struct ieee80211_action *) + header, stats); + break; + + case IEEE80211_STYPE_REASSOC_REQ: + IEEE80211_DEBUG_MGMT("received reassoc (%d)\n", + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); + + IEEE80211_WARNING("%s: IEEE80211_REASSOC_REQ received\n", + ieee->dev->name); + if (ieee->handle_reassoc_request != NULL) + ieee->handle_reassoc_request(ieee->dev, + (struct ieee80211_reassoc_request *) + header); + break; + + case IEEE80211_STYPE_ASSOC_REQ: + IEEE80211_DEBUG_MGMT("received assoc (%d)\n", + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); + + IEEE80211_WARNING("%s: IEEE80211_ASSOC_REQ received\n", + ieee->dev->name); + if (ieee->handle_assoc_request != NULL) + ieee->handle_assoc_request(ieee->dev); + break; + case IEEE80211_STYPE_DEAUTH: - printk("DEAUTH from AP\n"); + IEEE80211_DEBUG_MGMT("DEAUTH\n"); if (ieee->handle_deauth != NULL) - ieee->handle_deauth(ieee->dev, (struct ieee80211_auth *) + ieee->handle_deauth(ieee->dev, + (struct ieee80211_deauth *) header); break; default: diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index 8fdd943..8b4332f 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -56,7 +56,18 @@ Desc. | ctrl | dura | DA/RA | TA | SA | Sequ | Frame | fcs | `--------------------------------------------------| |------' Total: 28 non-data bytes `----.----' | - .- 'Frame data' expands to <---------------------------' + .- 'Frame data' expands, if WEP enabled, to <----------' + | + V + ,-----------------------. +Bytes | 4 | 0-2296 | 4 | + |-----|-----------|-----| +Desc. | IV | Encrypted | ICV | + | | Packet | | + `-----| |-----' + `-----.-----' + | + .- 'Encrypted Packet' expands to | V ,---------------------------------------------------. @@ -65,18 +76,7 @@ Bytes | 1 | 1 | 1 | 3 | 2 | 0-2304 | Desc. | SNAP | SNAP | Control |Eth Tunnel| Type | IP | | DSAP | SSAP | | | | Packet | | 0xAA | 0xAA |0x03 (UI)|0x00-00-F8| | | - `-----------------------------------------| | -Total: 8 non-data bytes `----.----' - | - .- 'IP Packet' expands, if WEP enabled, to <--' - | - V - ,-----------------------. -Bytes | 4 | 0-2296 | 4 | - |-----|-----------|-----| -Desc. | IV | Encrypted | ICV | - | | IP Packet | | - `-----------------------' + `---------------------------------------------------- Total: 8 non-data bytes 802.3 Ethernet Data Frame @@ -470,7 +470,9 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) atomic_inc(&crypt->refcnt); if (crypt->ops->build_iv) crypt->ops->build_iv(skb_frag, hdr_len, - crypt->priv); + ieee->sec.keys[ieee->sec.active_key], + ieee->sec.key_sizes[ieee->sec.active_key], + crypt->priv); atomic_dec(&crypt->refcnt); } diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index f87c6b8..af7f9bb 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -149,9 +149,7 @@ static char *ipw2100_translate_scan(struct ieee80211_device *ieee, iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID | IW_QUAL_LEVEL_INVALID; iwe.u.qual.qual = 0; - iwe.u.qual.level = 0; } else { - iwe.u.qual.level = network->stats.rssi; if (ieee->perfect_rssi == ieee->worst_rssi) iwe.u.qual.qual = 100; else @@ -179,6 +177,13 @@ static char *ipw2100_translate_scan(struct ieee80211_device *ieee, iwe.u.qual.noise = network->stats.noise; } + if (!(network->stats.mask & IEEE80211_STATMASK_SIGNAL)) { + iwe.u.qual.updated |= IW_QUAL_LEVEL_INVALID; + iwe.u.qual.level = 0; + } else { + iwe.u.qual.level = network->stats.signal; + } + start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN); iwe.cmd = IWEVCUSTOM; @@ -188,33 +193,21 @@ static char *ipw2100_translate_scan(struct ieee80211_device *ieee, if (iwe.u.data.length) start = iwe_stream_add_point(start, stop, &iwe, custom); + memset(&iwe, 0, sizeof(iwe)); if (network->wpa_ie_len) { - char buf[MAX_WPA_IE_LEN * 2 + 30]; - - u8 *p = buf; - p += sprintf(p, "wpa_ie="); - for (i = 0; i < network->wpa_ie_len; i++) { - p += sprintf(p, "%02x", network->wpa_ie[i]); - } - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - iwe.u.data.length = strlen(buf); + char buf[MAX_WPA_IE_LEN]; + memcpy(buf, network->wpa_ie, network->wpa_ie_len); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = network->wpa_ie_len; start = iwe_stream_add_point(start, stop, &iwe, buf); } + memset(&iwe, 0, sizeof(iwe)); if (network->rsn_ie_len) { - char buf[MAX_WPA_IE_LEN * 2 + 30]; - - u8 *p = buf; - p += sprintf(p, "rsn_ie="); - for (i = 0; i < network->rsn_ie_len; i++) { - p += sprintf(p, "%02x", network->rsn_ie[i]); - } - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - iwe.u.data.length = strlen(buf); + char buf[MAX_WPA_IE_LEN]; + memcpy(buf, network->rsn_ie, network->rsn_ie_len); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = network->rsn_ie_len; start = iwe_stream_add_point(start, stop, &iwe, buf); } @@ -229,6 +222,28 @@ static char *ipw2100_translate_scan(struct ieee80211_device *ieee, if (iwe.u.data.length) start = iwe_stream_add_point(start, stop, &iwe, custom); + /* Add spectrum management information */ + iwe.cmd = -1; + p = custom; + p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Channel flags: "); + + if (ieee80211_get_channel_flags(ieee, network->channel) & + IEEE80211_CH_INVALID) { + iwe.cmd = IWEVCUSTOM; + p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "INVALID "); + } + + if (ieee80211_get_channel_flags(ieee, network->channel) & + IEEE80211_CH_RADAR_DETECT) { + iwe.cmd = IWEVCUSTOM; + p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "DFS "); + } + + if (iwe.cmd == IWEVCUSTOM) { + iwe.u.data.length = p - custom; + start = iwe_stream_add_point(start, stop, &iwe, custom); + } + return start; } @@ -734,9 +749,98 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, return 0; } +int ieee80211_wx_set_auth(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct ieee80211_device *ieee = netdev_priv(dev); + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&ieee->lock, flags); + + switch (wrqu->param.flags & IW_AUTH_INDEX) { + case IW_AUTH_WPA_VERSION: + case IW_AUTH_CIPHER_PAIRWISE: + case IW_AUTH_CIPHER_GROUP: + case IW_AUTH_KEY_MGMT: + /* + * Host AP driver does not use these parameters and allows + * wpa_supplicant to control them internally. + */ + break; + case IW_AUTH_TKIP_COUNTERMEASURES: + break; /* FIXME */ + case IW_AUTH_DROP_UNENCRYPTED: + ieee->drop_unencrypted = !!wrqu->param.value; + break; + case IW_AUTH_80211_AUTH_ALG: + break; /* FIXME */ + case IW_AUTH_WPA_ENABLED: + ieee->privacy_invoked = ieee->wpa_enabled = !!wrqu->param.value; + break; + case IW_AUTH_RX_UNENCRYPTED_EAPOL: + ieee->ieee802_1x = !!wrqu->param.value; + break; + case IW_AUTH_PRIVACY_INVOKED: + ieee->privacy_invoked = !!wrqu->param.value; + break; + default: + err = -EOPNOTSUPP; + break; + } + spin_unlock_irqrestore(&ieee->lock, flags); + return err; +} + +int ieee80211_wx_get_auth(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct ieee80211_device *ieee = netdev_priv(dev); + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&ieee->lock, flags); + + switch (wrqu->param.flags & IW_AUTH_INDEX) { + case IW_AUTH_WPA_VERSION: + case IW_AUTH_CIPHER_PAIRWISE: + case IW_AUTH_CIPHER_GROUP: + case IW_AUTH_KEY_MGMT: + case IW_AUTH_TKIP_COUNTERMEASURES: /* FIXME */ + case IW_AUTH_80211_AUTH_ALG: /* FIXME */ + /* + * Host AP driver does not use these parameters and allows + * wpa_supplicant to control them internally. + */ + err = -EOPNOTSUPP; + break; + case IW_AUTH_DROP_UNENCRYPTED: + wrqu->param.value = ieee->drop_unencrypted; + break; + case IW_AUTH_WPA_ENABLED: + wrqu->param.value = ieee->wpa_enabled; + break; + case IW_AUTH_RX_UNENCRYPTED_EAPOL: + wrqu->param.value = ieee->ieee802_1x; + break; + default: + err = -EOPNOTSUPP; + break; + } + spin_unlock_irqrestore(&ieee->lock, flags); + return err; +} + EXPORT_SYMBOL(ieee80211_wx_set_encodeext); EXPORT_SYMBOL(ieee80211_wx_get_encodeext); EXPORT_SYMBOL(ieee80211_wx_get_scan); EXPORT_SYMBOL(ieee80211_wx_set_encode); EXPORT_SYMBOL(ieee80211_wx_get_encode); + +EXPORT_SYMBOL_GPL(ieee80211_wx_set_auth); +EXPORT_SYMBOL_GPL(ieee80211_wx_get_auth); diff --git a/net/socket.c b/net/socket.c index a00851f..7e1bdef 100644 --- a/net/socket.c +++ b/net/socket.c @@ -84,10 +84,7 @@ #include <linux/compat.h> #include <linux/kmod.h> #include <linux/audit.h> - -#ifdef CONFIG_NET_RADIO -#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ -#endif /* CONFIG_NET_RADIO */ +#include <linux/wireless.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -840,11 +837,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(cmd, argp); } else -#ifdef WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(cmd, argp); } else -#endif /* WIRELESS_EXT */ +#endif /* CONFIG_WIRELESS_EXT */ switch (cmd) { case FIOSETOWN: case SIOCSPGRP: |