1. 创建系统要求:
hostname: openfiler01
eth0: 192.168.1.155
eth1: 10.10.5.155
500MB Meta partition
4GB+ Data partition
hostname: openfiler02
eth0: 192.168.1.156
eth1: 10.10.5.156
500MB Meta partition
4GB+ Data partition
virtualip: 192.168.1.157 ( don't use on any adapter, we will make this later with corosync )
1.1 添加到host文件里面去(openfiler01 openfiler02都执行)
192.168.1.155 openfiler01
192.168.1.156 openfiler02
1.2 建立ssh认证
root@openfiler01 ~# ssh-keygen -t dsa
Generating public/private dsa key pair.
Enter file in which to save the key (/root/.ssh/id_dsa):
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_dsa.
Your public key has been saved in /root/.ssh/id_dsa.pub.
The key fingerprint is:
Do the same on openfiler02.
root@openfiler02 ~# ssh-keygen -t dsa
Then exchange the files:
root@openfiler01 ~# scp ~/.ssh/id_dsa.pub root@openfiler02:~/.ssh/authorized_keys
root@openfiler02 ~# scp ~/.ssh/id_dsa.pub root@openfiler01:~/.ssh/authorized_keys
2. Create meta/data Partition on both filers
sdb1 83
sdb2 8e
[root@openfiler01 ~]# fdisk -l
Disk /dev/sda: 17.2 GB, 17179869184 bytes
255 heads, 63 sectors/track, 2088 cylinders, total 33554432 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk identifier: 0x0000425b
Device Boot Start End Blocks Id System
/dev/sda1 * 63 610469 305203+ 83 Linux
/dev/sda2 610470 17382329 8385930 83 Linux
/dev/sda3 17382330 19486844 1052257+ 82 Linux swap / Solaris
Disk /dev/sdb: 21.5 GB, 21474836480 bytes
255 heads, 63 sectors/track, 2610 cylinders, total 41943040 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk identifier: 0x000ab4f7
Device Boot Start End Blocks Id System
[root@openfiler01 ~]# fdisk /dev/sdb
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4, default 1):
Using default value 1
First sector (2048-41943039, default 2048):
Using default value 2048
Last sector, +sectors or +size{K,M,G} (2048-41943039, default 41943039): +500M
Command (m for help): t
Selected partition 1
Hex code (type L to list codes): 83
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4, default 2):
Using default value 2
First sector (1026048-41943039, default 1026048):
Using default value 1026048
Last sector, +sectors or +size{K,M,G} (1026048-41943039, default 41943039):
Using default value 41943039
Command (m for help): t
Partition number (1-4): 2
Hex code (type L to list codes): 8e
Changed system type of partition 2 to 8e (Linux LVM)
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
[root@openfiler01 ~]# partprobe
[root@openfiler01 ~]# fdisk -l
Disk /dev/sda: 17.2 GB, 17179869184 bytes
255 heads, 63 sectors/track, 2088 cylinders, total 33554432 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk identifier: 0x0000425b
Device Boot Start End Blocks Id System
/dev/sda1 * 63 610469 305203+ 83 Linux
/dev/sda2 610470 17382329 8385930 83 Linux
/dev/sda3 17382330 19486844 1052257+ 82 Linux swap / Solaris
Disk /dev/sdb: 21.5 GB, 21474836480 bytes
255 heads, 63 sectors/track, 2610 cylinders, total 41943040 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk identifier: 0x000ab4f7
Device Boot Start End Blocks Id System
/dev/sdb1 2048 1026047 512000 83 Linux
/dev/sdb2 1026048 41943039 20458496 8e Linux LVM
2.1 Create DRBD Setup
Edit /etc/drbd.conf on openfiler01 and openfiler02配置drbd的主配置文件)
[root@openfiler02 ~]# cat /etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example
include "drbd.d/global_common.conf";
include "drbd.d/*.res";
resource meta {
on openfiler01 {
device /dev/drbd0;
disk /dev/sdb1;
address 10.10.5.155:7788;
meta-disk internal;
}
on openfiler02 {
device /dev/drbd0;
disk /dev/sdb1;
address 10.10.5.156:7788;
meta-disk internal;
}
}
resource data {
on openfiler01 {
device /dev/drbd1;
disk /dev/sdb2;
address 10.10.5.155:7789;
meta-disk internal;
}
on openfiler02 {
device /dev/drbd1;
disk /dev/sdb2;
address 10.10.5.156:7789;
meta-disk internal;
}
}
然后用drbdadm创建meta和data分区,如果第一次创建有错误提示的话,可以先通过下面的方法操作。然后再创建(注意,这两个分区不能再/etc/fstab分区表中出现。它们是由drbd控制)
dd if=/dev/zero of=/dev/drbdX
root@openfiler01 ~# drbdadm create-md meta
root@openfiler01 ~# drbdadm create-md data
root@openfiler02 ~# drbdadm create-md meta
root@openfiler02 ~# drbdadm create-md data
Now you can start up drbd with:
service drbd start
on both nodes.
Make one node primary:(创建一个主分区)
root@openfiler01 ~# drbdsetup /dev/drbd0 primary -o
root@openfiler01 ~# drbdsetup /dev/drbd1 primary -o
2.2 创建文件系统
root@openfiler01 ~# mkfs.ext3 /dev/drbd0
2.2.1 Openfiler to meta-Partition
在openfiler01上面执行以下脚本
root@openfiler01 ~# service openfiler stop
==============开始=====================
#!/bin/bash
mkdir /meta
mount /dev/drbd0 /meta
mv /opt/openfiler/ /opt/openfiler.local
mkdir /meta/opt
cp -a /opt/openfiler.local /meta/opt/openfiler
ln -s /meta/opt/openfiler /opt/openfiler
rm /meta/opt/openfiler/sbin/openfiler
ln -s /usr/sbin/httpd /meta/opt/openfiler/sbin/openfiler
rm /meta/opt/openfiler/etc/rsync.xml
ln -s /opt/openfiler.local/etc/rsync.xml /meta/opt/openfiler/etc/
mkdir -p /meta/etc/httpd/conf.d
service nfslock stop
umount -a -t rpc-pipefs
mv /etc/samba/ /meta/etc/
ln -s /meta/etc/samba/ /etc/samba
mkdir -p /meta/var/spool
mv /var/spool/samba/ /meta/var/spool/
ln -s /meta/var/spool/samba/ /var/spool/samba
mkdir -p /meta/var/lib
mv /var/lib/nfs/ /meta/var/lib/
ln -s /meta/var/lib/nfs/ /var/lib/nfs
mv /etc/exports /meta/etc/
ln -s /meta/etc/exports /etc/exports
mv /etc/ietd.conf /meta/etc/
ln -s /meta/etc/ietd.conf /etc/ietd.conf
mv /etc/initiators.allow /meta/etc/
ln -s /meta/etc/initiators.allow /etc/initiators.allow
mv /etc/initiators.deny /meta/etc/
ln -s /meta/etc/initiators.deny /etc/initiators.deny
mv /etc/proftpd /meta/etc/
ln -s /meta/etc/proftpd/ /etc/proftpd
rm /opt/openfiler/etc/httpd/modules
ln -s /usr/lib64/httpd/modules /opt/openfiler/etc/httpd/modules
service openfiler start
==============结束=====================
2.2.4 openfiler02 Openfiler Configuration
在openfiler02上面执行以下脚本
[root@openfiler02 ~]# cat start.sh
#!/bin/bash
service openfiler stop
mkdir /meta
mv /opt/openfiler/ /opt/openfiler.local
ln -s /meta/opt/openfiler /opt/openfiler
service nfslock stop
umount -a -t rpc-pipefs
rm -rf /etc/samba/
ln -s /meta/etc/samba/ /etc/samba
rm -rf /var/spool/samba/
ln -s /meta/var/spool/samba/ /var/spool/samba
rm -rf /var/lib/nfs/
ln -s /meta/var/lib/nfs/ /var/lib/nfs
rm -rf /etc/exports
ln -s /meta/etc/exports /etc/exports
rm /etc/ietd.conf
ln -s /meta/etc/ietd.conf /etc/ietd.conf
rm /etc/initiators.allow
ln -s /meta/etc/initiators.allow /etc/initiators.allow
rm /etc/initiators.deny
ln -s /meta/etc/initiators.deny /etc/initiators.deny
rm -rf /etc/proftpd
ln -s /meta/etc/proftpd/ /etc/proftpd
2.3 创建lvm分区
Change the lvm filter in the
/etc/lvm/lvm.conf
file from:
filter = [ "a/.*/" ]
to更改为
filter = [ "a|drbd[0-9]|", "r|.*|" ]
然后传输到openfiler02上面
root@openfiler01 ~# scp /etc/lvm/lvm.conf root@openfiler02:/etc/lvm/lvm.conf
After that we can create the actual used stuff:(然后创建lvm分区)
root@openfiler01 ~# pvcreate /dev/drbd1
root@openfiler01 ~# vgcreate data /dev/drbd1
root@openfiler01 ~# lvcreate -L 400M -n filer data
3. 开始搭建corosync
3.1 Create Corosync authkey创建双方的认证
root@openfiler01~# corosync-keygen 执行之后等待他的输出,一直到结束!
( Press the real keyboard instead of pressing keys in an ssh terminal. )
Copy the authkey file to the other node and change the fileaccess:
root@openfiler01~# scp /etc/corosync/authkey root@openfiler02:/etc/corosync/authkey
root@openfiler02~# chmod 400 /etc/corosync/authkey
3.2 创建 pcmk /etc/corosync/service.d/pcmk
root@openfiler01~# vi /etc/corosync/service.d/pcmk
service {
# Load the Pacemaker Cluster Resource Manager
name: pacemaker
ver: 0
}
3.2.1 拷贝到openfiler02上面
root@openfiler01~# scp /etc/corosync/service.d/pcmk root@openfiler02:/etc/corosync/service.d/pcmk
3.3 Create the corosync.conf file and change it to present your lan net ( bindnetaddr )
[root@openfiler01 ~]# cat /etc/corosync/corosync.conf
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem {
version: 2
secauth: off
threads: 0
interface {
ringnumber: 0
bindnetaddr: 10.10.5.0 (心跳线的广播域)
mcastaddr: 226.94.8.8 (组播地址 选取这个段的)
mcastport: 5405
ttl: 1
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
to_syslog: yes
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
amf {
mode: disabled
}
3.3.1 拷贝一份到openfiler02
root@openfiler01~# scp /etc/corosync/corosync.conf root@openfiler02:/etc/corosync/corosync.conf
4.准备corosync配置
首先,我们准备重启机器,然后把以下的服务停掉随机启动,因为我们需要用corosync来控制他们
root@openfiler01~# chkconfig --level 2345 openfiler off
root@openfiler01~# chkconfig --level 2345 nfslock off
root@openfiler01~# chkconfig --level 2345 corosync on
俩个节点都要执行 :
root@openfiler02~# chkconfig --level 2345 openfiler off
root@openfiler02~# chkconfig --level 2345 nfslock off
root@openfiler02~# chkconfig --level 2345 corosync on
然后重启机器,等待....
4.1 Check if corosync started properly查看corosync是否启动正常
root@openfiler01~# ps auxf
root@openfiler01~# ps auxf
root 3480 0.0 0.8 534456 4112 ? Ssl 19:15 0:00 corosync
root 3486 0.0 0.5 68172 2776 ? S 19:15 0:00 \_ /usr/lib64/heartbeat/stonith
106 3487 0.0 1.0 67684 4956 ? S 19:15 0:00 \_ /usr/lib64/heartbeat/cib
root 3488 0.0 0.4 70828 2196 ? S 19:15 0:00 \_ /usr/lib64/heartbeat/lrmd
106 3489 0.0 0.6 68536 3096 ? S 19:15 0:00 \_ /usr/lib64/heartbeat/attrd
106 3490 0.0 0.6 69064 3420 ? S 19:15 0:00 \_ /usr/lib64/heartbeat/pengine
106 3491 0.0 0.7 76764 3488 ? S 19:15 0:00 \_ /usr/lib64/heartbeat/crmd
root@openfiler02~# crm_mon --one-shot -V
crm_mon[3602]: 2011/03/24_19:32:07 ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
crm_mon[3602]: 2011/03/24_19:32:07 ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
crm_mon[3602]: 2011/03/24_19:32:07 ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
============
Last updated: Thu Mar 24 19:32:07 2011
Stack: openais
Current DC: openfiler01 - partition with quorum
Version: 1.1.2-c6b59218ee949eebff30e837ff6f3824ed0ab86b
2 Nodes configured, 2 expected votes
0 Resources configured.
============
Online: [ openfiler01 openfiler02 ]
4.2 Configure Corosync as following配置corosync
Now before do monitor the status of starting the cluster on openfiler02:
root@openfiler02~# crm_mon
4.2.1 Howto configure corosync step by step
root@openfiler01~# crm configure
crm(live)configure# property stonith-enabled="false"
crm(live)configure# property no-quorum-policy="ignore"
crm(live)configure# rsc_defaults $id="rsc-options" resource-stickiness="100"
crm(live)configure# primitive ClusterIP ocf:heartbeat:IPaddr2 params ip="192.168.1.157" cidr_netmask="24" op monitor interval="30s"
crm(live)configure# primitive MetaFS ocf:heartbeat:Filesystem params device="/dev/drbd0" directory="/meta" fstype="ext3"
#crm(live)configure# primitive lvmdata ocf:heartbeatVM params volgrpname="data"
crm(live)configure# primitive drbd_meta ocf:linbit:drbd params drbd_resource="meta" op monitor interval="15s"
crm(live)configure# primitive drbd_data ocf:linbit:drbd params drbd_resource="data" op monitor interval="15s"
crm(live)configure# primitive openfiler lsbpenfiler
crm(live)configure# primitive iscsi lsb:iscsi-target
#crm(live)configure# primitive samba lsb:smb
#crm(live)configure# primitive nfs lsb:nfs
#crm(live)configure# primitive nfslock lsb:nfslock
crm(live)configure# group g_drbd drbd_meta drbd_data
crm(live)configure# group g_services MetaFS lvmdata openfiler ClusterIP iscsi samba nfs nfslock
crm(live)configure# ms ms_g_drbd g_drbd meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
crm(live)configure# colocation c_g_services_on_g_drbd inf: g_services ms_g_drbd:Master
crm(live)configure# order o_g_servicesafter_g_drbd inf: ms_g_drbd:promote g_services:start
crm(live)configure# commit
Watch now on the monitor process how the resources all start hopefully.
root@openfiler01 ~# crm_mon
以上会有warn提示,但是不是报错,没有关系
4.2.2 Troubleshooting
If you get any errors because you done commit before the end of the config, then you need to do a cleanup, as in this example:
root@openfiler01~# crm
crm(live)resource cleanup MetaFS
4.2.3 Verify the config
验证你的配置信息,通过输入
[root@openfiler01 ~]# crm configure show
node openfiler01
node openfiler02
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.1.157" cidr_netmask="24" \
op monitor interval="30s"
primitive MetaFS ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/meta" fstype="ext3"
primitive drbd_data ocf:linbit:drbd \
params drbd_resource="data" \
op monitor interval="15s"
primitive drbd_meta ocf:linbit:drbd \
params drbd_resource="meta" \
op monitor interval="15s"
primitive iscsi lsb:iscsi-target
primitive lvmdata ocf:heartbeatVM \
params volgrpname="data"
primitive nfs lsb:nfs
primitive nfslock lsb:nfslock
primitive openfiler lsbpenfiler
primitive samba lsb:smb
group g_drbd drbd_meta drbd_data
group g_services MetaFS lvmdata openfiler ClusterIP iscsi samba nfs nfslock
ms ms_g_drbd g_drbd \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
colocation c_g_services_on_g_drbd inf: g_services ms_g_drbd:Master
order o_g_servicesafter_g_drbd inf: ms_g_drbd:promote g_services:start
property $id="cib-bootstrap-options" \
dc-version="1.1.2-c6b59218ee949eebff30e837ff6f3824ed0ab86b" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
然后你可以通过输入
crm_mon来查看状态。服务是不是正常启动
Last updated: Mon Dec 17 10:40:54 2012
Stack: openais
Current DC: openfiler01 - partition with quorum
Version: 1.1.2-c6b59218ee949eebff30e837ff6f3824ed0ab86b
4 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ openfiler01 openfiler02 ]
Resource Group: g_services
MetaFS (ocf::heartbeat:Filesystem): Started openfiler01
lvmdata (ocf::heartbeatVM): Started openfiler01
openfiler (lsbpenfiler): Started openfiler01
ClusterIP (ocf::heartbeat:IPaddr2): Started openfiler01
iscsi (lsb:iscsi-target): Started openfiler01
samba (lsb:smb): Started openfiler01
nfs (lsb:nfs): Started openfiler01
nfslock (lsb:nfslock): Started openfiler01
Master/Slave Set: ms_g_drbd
Masters: [ openfiler01 ]
Slaves: [ openfiler02 ]
5.模拟心跳down机实验以及恢复
5.1 在目前主机上面建立crontab任务
问题:drbd脑裂问题,当心跳线断掉的时候,两个节点同时可以对外提供服务,通过corosync管理服务机制出现问题,双方都对外提供服务,会导致最终数据产生不一致。
按照上面实验openfiler01作为主机对外提供服务,在openfiler01上面添加crontab任务,当检测到自己网卡down掉,或者是心跳线出现问题,
在验证属于自己问题后,将集群服务停掉,自动退出集群节点。
[root@openfiler01 ~]# cat drbd.sh
#!/bin/bash
#env > /root/bbb
/sbin/ifconfig | grepeth1
if [ $? = 0 ]
then
echo " $(date +%F/%H:%M) : eth1 OK"
else
echo "$(date +%F/%H:%M): eth1 down ,stop service" >> /root/fail.txt
pkill corosync
fi
ping -c110.10.5.156 &> /dev/null
if [ $? = 0 ]
then
echo " $(date +%F/%H:%M) : ping OK"
else
ping -c1 192.168.11.1 &> /dev/null
if [ $? = 0 ]
then
echo "$(date +%F/%H:%M) : ping 11.1OK 223 failed" >> /root/alter.txt
else
echo"$(date +%F/%H:%M): ping failed , stop service " >> /root/fail.txt
pkillcorosync
fi
fi
添加到crontab,时时检测问题。
[root@openfiler01 ~]#crontab -l
* * * * * sh/root/drbd.sh &
5.2 进行openfiler01 down机实验
在openfiler01上面down掉网卡,通过检测到网卡down之后。
[root@openfiler01 ~]#ifdown eth1
openfiler01会自动关闭corosync服务,脱离集群,这个时候通过
https://192.168.1.157:446
查看openfiler时正常对外提供服务的,一直ping 虚拟IP(192.168.1.157)也查看到没有超时的状况,符合我们本来的需求。
当openfiler01恢复的时候,需要重新添加到集群里面。
以下操作需要在openfiler01上面执行:
首先设置openfiler01作为备机出现
[root@openfiler01 ~]# drbdadm secondary all(在drbd.conf中设置的名称,可以用all)
[root@openfiler01 ~]# drbdadm disconnect all (备机断掉可以用all)
[root@openfiler01 ~]# drbdadm ----discard-my-data connect all (同步数据连接上)
现在查看drbd的状态会发现时 diskless/uptodate 解决diskless需要运行下面命令
[root@openfiler01 ~]# drbdadmattach all(解决diskless问题)
[root@openfiler01 ~]# drbdadm invalidate all(从头重新同步数据,这个持续时间会很长)
通过以上恢复步骤,openfiler01会重新添加进来,然后记住手动启动起来corosync服务
然后主备的身份完成切换了,如果需要更换回到openfiler01,需要手动将openfiler02的网卡down掉按照上述过程重新切换回去即可。
问题:
[root@openfiler02 cluster]# crm status
============
Last updated: Fri Jun 10 08:13:37 2016
Stack: openais
Current DC: openfiler01 - partition with quorum
Version: 1.1.2-c6b59218ee949eebff30e837ff6f3824ed0ab86b
2 Nodes configured, 2 expected votes
3 Resources configured.
============
Online: [ openfiler01 openfiler02 ]
Resource Group: g_services
MetaFS (ocf::heartbeat:Filesystem): Started openfiler02
lvmdata (ocf::heartbeatVM): Started openfiler02
openfiler (lsbpenfiler): Started openfiler02
ClusterIP (ocf::heartbeat:IPaddr2): Started openfiler02
iscsi (lsb:iscsi-target): Started openfiler02
samba (lsb:smb): Started openfiler02
nfs (lsb:nfs): Started openfiler02
Master/Slave Set: ms_g_drbd
Masters: [ openfiler02 ]
Slaves: [ openfiler01 ]
Failed actions:
nfslock_start_0 (node=openfiler02, call=16, rc=1, status=complete): unknown error
nfslock_start_0 (node=openfiler01, call=12, rc=1, status=complete): unknown error
调整:
[root@openfiler02 ~]# crm configure edit
node openfiler01
node openfiler02
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.1.157" cidr_netmask="24" \
op monitor interval="30s"
primitive MetaFS ocf:heartbeat:Filesystem \
params device="/dev/drbd0" directory="/meta" fstype="ext3"
primitive drbd_data ocf:linbit:drbd \
params drbd_resource="data" \
op monitor interval="15s"
primitive drbd_meta ocf:linbit:drbd \
params drbd_resource="meta" \
op monitor interval="15s"
primitive iscsi lsb:iscsi-target
primitive lvmdata ocf:heartbeatVM \
params volgrpname="data"
primitive nfs lsb:nfs
primitive nfslock lsb:nfslock
primitive openfiler lsbpenfiler
primitive samba lsb:smb
group g_drbd drbd_meta drbd_data
group g_services MetaFS lvmdata openfiler ClusterIP iscsi samba nfs nfslock
nfs 后面添加nfslock
:wq
commit
|
|