一、安装ubuntu 公司发的电脑,Thinkpad X13,8核16G,通过vm搭建一个ubuntu1.18.4的linux环境,8核16G+100G存储。
环境如下
root@ubuntu:~/kubeflow-manifests# uname -a
Linux ubuntu 4.15.0-180-generic #189-Ubuntu SMP Wed May 18 14:13:57 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
开启root ssh
sudo passwd root
vi /etc/ssh/sshd_config #PermitRootLogin yes
/etc/init.d/ssh restart
虚拟机配置如下
查看物理CPU个数
查看每个物理CPU中core的个数(即核数)
查看逻辑CPU的个数
查看线程数
root@ubuntu:~# cat /proc/cpuinfo| grep "physical id"| sort| uniq| wc -l
4
root@ubuntu:~# cat /proc/cpuinfo| grep "cpu cores"| uniq
cpu cores : 2
root@ubuntu:~# cat /proc/cpuinfo| grep "processor"| wc -l
8
root@ubuntu:~# grep 'processor' /proc/cpuinfo | sort -u | wc -l
8
内存
root@ubuntu:~# free -g
totalusedfreesharedbuff/cacheavailable
Mem:150140015
Swap:000
存储
root@ubuntu:~# df -h
FilesystemSizeUsed Avail Use% Mounted on
udev7.8G07.8G0% /dev
tmpfs1.6G1.3M1.6G1% /run
/dev/mapper/ubuntu--vg-ubuntu--lv97G6.3G86G7% /
tmpfs7.9G07.9G0% /dev/shm
tmpfs5.0M05.0M0% /run/lock
tmpfs7.9G07.9G0% /sys/fs/cgroup
/dev/sda2976M80M830M9% /boot
tmpfs1.6G01.6G0% /run/user/0
二、安装docker 【k8s学习总结|穷人安装ubuntu1.18.4+docker+kind+k8s-1.19.1+kubeflow-1.3】按照之前的虚拟机安装文档,搭建到了时间同步,docker安装
root@ubuntu:~# docker info
Client:
Context:default
Debug Mode: false
Plugins:
app: Docker App (Docker Inc., v0.9.1-beta3)
buildx: Docker Buildx (Docker Inc., v0.8.2-docker)
scan: Docker Scan (Docker Inc., v0.17.0)Server:
Containers: 0
Running: 0
Paused: 0
Stopped: 0
Images: 0
Server Version: 20.10.16
Storage Driver: overlay2
Backing Filesystem: extfs
Supports d_type: true
Native Overlay Diff: true
userxattr: false
Logging Driver: json-file
Cgroup Driver: systemd
Cgroup Version: 1
Plugins:
Volume: local
Network: bridge host ipvlan macvlan null overlay
Log: awslogs fluentd gcplogs gelf journald json-file local logentries splunk syslog
Swarm: inactive
Runtimes: io.containerd.runc.v2 io.containerd.runtime.v1.linux runc
Default Runtime: runc
Init Binary: docker-init
containerd version: 212e8b6fa2f44b9c21b2798135fc6fb7c53efc16
runc version: v1.1.1-0-g52de29d
init version: de40ad0
Security Options:
apparmor
seccomp
Profile: default
Kernel Version: 4.15.0-180-generic
Operating System: Ubuntu 18.04.6 LTS
OSType: linux
Architecture: x86_64
CPUs: 8
Total Memory: 15.64GiB
Name: ubuntu
ID: QOFG:2C6N:RLFE:N4CR:TCYZ:WYS6:YHMC:BNX5:47LP:7M6L:LTBK:BCBT
Docker Root Dir: /var/lib/docker
Debug Mode: false
Registry: https://index.docker.io/v1/
Labels:
Experimental: false
Insecure Registries:
127.0.0.0/8
Registry Mirrors:
https://fzy3wxn0.mirror.aliyuncs.com/
Live Restore Enabled: falseWARNING: No swap limit support
三、安装kind
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.11.1/kind-linux-amd64
chmod +x ./kind
mv kind /usr/bin/
which kind
设置kind命令自定补全
source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
echo 'source <(kind completion bash)' >>~/.bashrc
四、安装k8s 去docker hub 下载镜像,go
docker pull kindest/node:v1.19.1
下载kubeflow安装文件
git clone https://github.com/shikanon/kubeflow-manifests.git
安装k8s
cd kubeflow-manifests
kind create cluster --config=kind/kind-config.yaml --name=kubeflow --image=kindest/node:v1.19.1
安装kubectl
为了能够查看pods,还要安装kubectl工具
- 更新源
# 使得 apt 支持 ssl 传输
apt-get update && apt-get install -y apt-transport-https
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -
cat </etc/apt/sources.list.d/kubernetes.list
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF
apt-get update
- 查看kubectl可用版本
apt-cache madison kubectl kubeadm kubelet
- 安装
apt-get install kubectl=1.19.1-00
结果
root@ubuntu:~/kubeflow-manifests# kubectl get pods -A
NAMESPACENAMEREADYSTATUSRESTARTSAGE
kube-systemcoredns-74ff55c5b-2fxs81/1Running023m
kube-systemcoredns-74ff55c5b-bp7c21/1Running023m
kube-systemetcd-kubeflow-control-plane1/1Running023m
kube-systemkindnet-hhbgz1/1Running023m
kube-systemkube-apiserver-kubeflow-control-plane1/1Running023m
kube-systemkube-controller-manager-kubeflow-control-plane1/1Running023m
kube-systemkube-proxy-7jqgd1/1Running023m
kube-systemkube-scheduler-kubeflow-control-plane1/1Running023m
local-path-storagelocal-path-provisioner-78776bfc44-bx4ws1/1Running023m
注意
kind 会创建存储类,如果是k8s cluster 模式,需要先搭建存储类storageclass,没有的提前创建一下。
kubectl create -f localpath/
五、安装kubeflow python3 install.py
由于017-***.yaml 文件会报错
error: unable to recognize "./manifest1.3/017-pipeline-env-platform-agnostic-multi-user.yaml": no matches for kind "CompositeController" in version "metacontroller.k8s.io/v1alpha1"
重新apply一下
kubectl apply -f manifest1.3/017-pipeline-env-platform-agnostic-multi-user.yaml
kubectl apply -f patch/
然后等待就可以了,一共用了一个小时
结果
root@ubuntu:~/kubeflow-manifests# kubectl get pods -A
NAMESPACENAMEREADYSTATUSRESTARTSAGE
authdex-6d8cd4fccb-ktjdl1/1Running027m
cert-managercert-manager-649f8dfd4b-89cwl1/1Running033m
cert-managercert-manager-cainjector-75cd8bbf6d-wzmbb1/1Running033m
cert-managercert-manager-webhook-5b5cd9bd6f-4j4mn1/1Running133m
istio-systemauthservice-01/1Running032m
istio-systemcluster-local-gateway-74d9fd9586-wpc7p1/1Running027m
istio-systemistio-ingressgateway-8bf685655-8bf5z1/1Running027m
istio-systemistiod-756554b96b-hf6971/1Running027m
knative-eventingbroker-controller-cfb5ccb77-57nqr1/1Running031m
knative-eventingeventing-controller-8657cd4b8-xw5qq1/1Running031m
knative-eventingeventing-webhook-67f86f4d4d-grm271/1Running031m
knative-eventingimc-controller-68bd666784-927rr1/1Running031m
knative-eventingimc-dispatcher-78ff9dd847-c4kjs1/1Running031m
knative-servingactivator-54b777546f-zjdlq1/1Running131m
knative-servingautoscaler-79bbc84d47-g8dp41/1Running031m
knative-servingcontroller-dd65cb4b7-82ngq1/1Running031m
knative-servingistio-webhook-5f545fc44b-bgthr1/1Running031m
knative-servingnetworking-istio-6b6df495d6-wdfwq1/1Running031m
knative-servingwebhook-9ff656f95-qp89s1/1Running031m
kube-systemcoredns-74ff55c5b-2fxs81/1Running058m
kube-systemcoredns-74ff55c5b-bp7c21/1Running058m
kube-systemetcd-kubeflow-control-plane1/1Running058m
kube-systemkindnet-hhbgz1/1Running058m
kube-systemkube-apiserver-kubeflow-control-plane1/1Running058m
kube-systemkube-controller-manager-kubeflow-control-plane1/1Running058m
kube-systemkube-proxy-7jqgd1/1Running058m
kube-systemkube-scheduler-kubeflow-control-plane1/1Running058m
kubeflow-user-example-comml-pipeline-ui-artifact-6b9bb7f495-8s9kh2/2Running04m34s
kubeflow-user-example-comml-pipeline-visualizationserver-5c648f8448-7hlmt2/2Running04m34s
kubeflowadmission-webhook-deployment-5f5cc7968b-rzbsz1/1Running029m
kubeflowcache-deployer-deployment-64598b6c87-7crx42/2Running130m
kubeflowcache-server-59d67c7584-r7wkb2/2Running027m
kubeflowcentraldashboard-7b6b6cc7fc-tk7mj1/1Running029m
kubeflowjupyter-web-app-deployment-7c6974bb88-2bz7z1/1Running027m
kubeflowkatib-controller-7b784c44dd-lbsgm1/1Running030m
kubeflowkatib-db-manager-6c5757dc64-zdpdd1/1Running330m
kubeflowkatib-mysql-79d75c7444-fjzwq1/1Running030m
kubeflowkatib-ui-69f5b6795d-b7lx71/1Running030m
kubeflowkfserving-controller-manager-02/2Running030m
kubeflowkubeflow-pipelines-profile-controller-7699846fd7-sfw2d1/1Running027m
kubeflowmetacontroller-01/1Running030m
kubeflowmetadata-envoy-deployment-56f745f7fb-j66bw1/1Running030m
kubeflowmetadata-grpc-deployment-6494577fdb-d57jf2/2Running330m
kubeflowmetadata-writer-b7ff9787-5fngh2/2Running130m
kubeflowminio-cc8f7c6d-khbxw2/2Running025m
kubeflowml-pipeline-66bcb9d79d-jnfnc2/2Running530m
kubeflowml-pipeline-persistenceagent-7fb8f6dc68-nlhsp2/2Running130m
kubeflowml-pipeline-scheduledworkflow-64bcfd6596-8ncr72/2Running030m
kubeflowml-pipeline-ui-8578f6685f-d4llg2/2Running030m
kubeflowml-pipeline-viewer-crd-565fb9b5c5-nlbdv2/2Running130m
kubeflowml-pipeline-visualizationserver-b7c7d49fb-h72g72/2Running030m
kubeflowmpi-operator-794849c566-jksfc1/1Running028m
kubeflowmxnet-operator-6668d797d4-72f2d1/1Running027m
kubeflowmysql-9dfc684cd-4dcs92/2Running030m
kubeflownotebook-controller-deployment-6795dd887b-w5dnn1/1Running029m
kubeflowprofiles-deployment-84bd4f9bc7-558bw2/2Running029m
kubeflowpytorch-operator-6887749499-t2b5f2/2Running028m
kubeflowtensorboard-controller-controller-manager-dd896c8df-9gqc53/3Running128m
kubeflowtensorboards-web-app-deployment-5969cd5b68-btxk41/1Running027m
kubeflowtf-job-operator-ccb48b77b-ptppw1/1Running028m
kubeflowvolumes-web-app-deployment-867dfb5b5c-qtfkz1/1Running027m
kubeflowworkflow-controller-6885c56f65-c45qw2/2Running127m
kubeflowxgboost-operator-deployment-665cf9bf8d-5kzrd2/2Running127m
local-path-storagelocal-path-provisioner-78776bfc44-bx4ws1/1Running058m
用户名是admin@example.com,密码是password
文章图片
六、卸载命令
kind delete cluster --name kubeflow # 卸载集群
apt-get --purge remove kubectl# 卸载软件包
apt-get autoremove kubectl # 卸载相关依赖
七、追根溯源 在kubeflow官网,有很多的repo,找到manifests文件
todo
—参考— 1. kind安装k8s集群
2. 玩转Kubeflow第一章: kubeflow 国内本地安装及案例介绍
3. 手把手教你搭建Kubeflow——基于K8s的机器学习平台
3 这篇文章挺好的,会拆解安装的内容。
推荐阅读
- DevOps|How to start the dotnetsdk docker image for test?
- Docker|Docker--本地镜像发布到私有库
- 目录索引|docker使用教程
- Docker 安装部署
- 解决ubuntu无法修改分辨率为1920*1080问题
- 机器学习|AI圈真魔幻!谷歌最新研究表明卷积在NLP预训练上竟优于Transformer(LeCun暧昧表态...)
- Docker部署安装禅道
- Scikit-Learn集成机器学习(boosting(4万字详解,收藏))
- docker-Consul概述以及集群环境搭建